diff --git "a/experiments/results/detailed_results.json" "b/experiments/results/detailed_results.json" new file mode 100644--- /dev/null +++ "b/experiments/results/detailed_results.json" @@ -0,0 +1,58637 @@ +{ + "sst2": { + "train": { + "results": [ + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nhide new secretions from the parental units \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.6222, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.6558, 0.8040, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.5134, 0.6513, 0.6029, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.2134, 0.3405,\n 0.2971, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.4145, 0.3721, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.1750, 8.3152, 8.4540, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 8.9086, 8.8007, 8.9324, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.4636, 10.5769, 10.6894, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.0554, 10.9637, 11.0724, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.3455, 12.4430, 12.5401, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 12.8546, 12.7735, 12.8680, 12.9621, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.2668, 13.1878, 13.2796,\n 13.3710, 13.2927, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.6667,\n 13.5897, 13.6789, 13.7679, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ncontains no wit , only labored gags \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.8266, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.0401, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -0.8704,\n -0.7377, -0.6058, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "159", + "Fraction of T in Greenlist": "79.9%", + "z-score": "17.9", + "p value": "7.69e-72", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 6.9402,\n 7.1358, 6.9310, 7.1241, 7.3131, 7.4983, 7.3054, 7.1187, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.6469, 8.8029, 8.6424, 8.7970, 8.9496,\n 9.1002, 8.9456, 8.7943, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.8020, 9.9392, 10.0750, 10.2093, 10.0701,\n 10.2036, 10.0673, 10.2000, 10.3314, 10.4614, 10.3288, 10.1982, 10.0698,\n 10.1999, 10.3287, 10.4565, 10.5830, 10.7084, 10.8328, 10.9560, 11.0782,\n 11.1994, 11.3196, 11.4388, 11.3163, 11.4349, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.5476, 11.4311, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.4384,\n 12.5462, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 12.8599,\n 12.9641, 13.0677, 13.1707, 13.2730, 13.3747, 13.4758, 13.5764, 13.6763,\n 13.7757, 13.8745, 13.9728, 13.8695, 13.9675, 13.8654, 13.9630, 14.0601,\n 14.1567, 14.2527, 14.3483, 14.2481, 14.3434, 14.4381, 14.5324, 14.6262,\n 14.7195, 14.8124, 14.9048, 14.9967, 15.0882, 15.1792, 15.2698, 15.1727,\n 15.2631, 15.1669, 15.2570, 15.3467, 15.4360, 15.5249, 15.6133, 15.5188,\n 15.6070, 15.6949, 15.7823, 15.8694, 15.9561, 16.0424, 16.1283, 16.2139,\n 16.2990, 16.3839, 16.4684, 16.3764, 16.4607, 16.3695, 16.4536, 16.5374,\n 16.6208, 16.7039, 16.7866, 16.6969, 16.7794, 16.8616, 16.9435, 17.0251,\n 17.1064, 17.1873, 17.2680, 17.3483, 17.4284, 17.5081, 17.5875, 17.5000,\n 17.5793, 17.4925, 17.5716, 17.6504, 17.7290, 17.8072, 17.8852])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthat loves its characters and communicates something rather beautiful about human nature \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -1.8716, -1.9180, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.9575, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.0674, -3.1009, -3.1342, -3.1674, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.9628, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.8641, 7.0133, 6.9076, 6.8034, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.9586, 8.8602, 8.7629, 8.6667, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.3333, 10.4407, 10.5475, 10.6537, 10.5714, 10.4898,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.2602, 11.3610, 11.4614, 11.3820, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.5444, 11.6425, 11.5655, 11.6632,\n 11.7604, 11.6840, 11.7808, 11.7050, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.3333,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nremains utterly satisfied to remain the same throughout \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.2780,\n -1.1375, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.4008, -1.4393, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.1584, 6.3594, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.0928, 7.9754, 8.1196, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.6418, 8.7758, 8.9086, 8.8007, 8.9324, 8.8260, 8.9567,\n 9.0863, 8.9815, 9.1101, 9.0067, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 9.9187, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.1948, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 10.8363, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.2877,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.5311, 11.6356, 11.7395,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 11.9737, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.0185, 12.1184, 12.0341, 11.9504, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.1805, 12.2782, 12.3754, 12.2940,\n 12.3908, 12.3100, 12.2298, 12.3263, 12.2467, 12.1677, 12.0893, 12.1854,\n 12.1076, 12.2033, 12.2987, 12.2214, 12.3163, 12.2397, 12.1635, 12.2581,\n 12.3523, 12.2767, 12.3705, 12.2954, 12.2209, 12.3143, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.5367, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\non the worst revenge-of-the-nerds clich\u00e9s the filmmakers could dredge up \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.8%", + "z-score": "-2.02", + "p value": "0.978", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, 0.1191, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.3131, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.4936, 8.6461, 8.7967, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.0990, 8.9618, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.1455, 9.2828, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.7065, 9.5876, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.0242, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.5363, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.7045, 11.6041, 11.7130, 11.6139, 11.7222, 11.6242, 11.7320, 11.6351,\n 11.5391, 11.6465, 11.5515, 11.6584, 11.7647, 11.8704, 11.9754, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.2093, 13.1198, 13.2166, 13.3128, 13.2243, 13.1364, 13.2324, 13.1453,\n 13.0590, 12.9732, 12.8881, 12.9840, 13.0795, 12.9952, 12.9116, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.2068, 13.3002, 13.3933, 13.4859, 13.5781,\n 13.6698, 13.7612, 13.8522, 13.7706, 13.8613, 13.7803, 13.8707, 13.7904,\n 13.7106, 13.6313, 13.7215, 13.8113, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.0986, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthat 's far too tragic to merit such superficial treatment \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.7139, -0.7539, -0.6266, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.4641, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.6307, 9.7473, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.4524, 10.5632, 10.4738, 10.5841, 10.4956,\n 10.6052, 10.5175, 10.6265, 10.7349, 10.6481, 10.5621, 10.4769, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.2602, 11.1807, 11.1018, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.5444, 11.4674, 11.5655, 11.6632,\n 11.5868, 11.5109, 11.6082, 11.7050, 11.8014, 11.7261, 11.8221, 11.9176,\n 11.8429, 11.7687, 11.8638, 11.7901, 11.7169, 11.8117, 11.9060, 12.0000,\n 11.9273, 11.8551, 11.9487, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ndemonstrates that the director of such hollywood blockbusters as patriot games can still turn out a small , personal film with an emotional wallop . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.4142,\n -1.2310, -1.0498, -1.1025, -0.9238, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.2466, -0.0983, -0.1469, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.1429, 0.0000, -0.0473, 0.0943, 0.0470, 0.1873,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.4070, 0.3607, 0.4944, 0.4481, 0.5808, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.7789, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.0553, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.7047, 7.8428, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.4116, 8.5396, 8.6667, 8.7927, 8.6976, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 8.7913, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.4185, 9.3320, 9.2463, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.9542, 9.8702, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.9067, 9.8293, 9.7526, 9.8590,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.4367, 10.3617, 10.4638, 10.5654, 10.6665, 10.7671, 10.6927, 10.7928,\n 10.8925, 10.9917, 10.9178, 10.8444, 10.9431, 11.0414, 11.1392, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.2129, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.6411, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nof saucy \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.1866, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.7540, -2.7995, -2.8446, -2.6485, -2.4546, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.4951, -2.3094, -2.3554, -2.4010, -2.4462, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.2222, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -2.0548, -1.8843, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -1.9545, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.7655, -1.8091, -1.8523, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -1.8829, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.3822, -2.4198, -2.4572, -2.3163,\n -2.1762, -2.0369, -2.0751, -2.1131, -2.1509, -2.0134, -2.0512, -2.0889,\n -2.1264, -1.9906, -2.0282, -1.8935, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -2.0578, -2.0943, -1.9635, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.8145, -1.8511, -1.8874, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.0469, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.0943, 5.9932, 5.8936, 6.0474, 6.1996, 6.3502, 6.4993, 6.4008,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.4059, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.5569, 6.4663, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.7049, 6.6171, 6.7536, 6.8889, 6.8019, 6.7159, 6.6308, 6.7648,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.2058, 7.3346, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.5484, 7.4661, 7.3845, 7.5094, 7.6335, 7.7567,\n 7.8791, 7.7981, 7.9196, 7.8393, 7.9600, 8.0798, 8.1989, 8.1192,\n 8.0402, 8.1585, 8.0801, 8.1976, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.4664, 8.3910, 8.5041, 8.6166,\n 8.5417, 8.6535, 8.5792, 8.6903, 8.8008, 8.7270, 8.8369, 8.9461,\n 8.8728, 8.9815, 9.0895, 9.0167, 8.9444, 9.0518, 9.1587, 9.2651,\n 9.3708, 9.2990, 9.2276, 9.1567, 9.0863, 9.1915, 9.2961, 9.4002,\n 9.3302, 9.4338, 9.3642, 9.4673, 9.5698, 9.6719, 9.7735, 9.7043,\n 9.8054, 9.9060, 9.8373, 9.9374, 9.8691, 9.9687, 10.0679, 10.1667,\n 10.0987, 10.0312, 9.9641, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na depressed fifteen-year-old 's suicidal poetry \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.4899, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.2218, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.3035, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.0849, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.5672, 8.7045, 8.5915, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.6380, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 10.8544, 10.9637, 11.0724, 11.1803, 11.0897,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.3373, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.5401, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.6643, 12.5831, 12.6785, 12.7735, 12.6930, 12.7876, 12.8817,\n 12.9755, 12.8957, 12.9891, 13.0821, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.6546, 13.5771, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.6914, 13.6155, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nare more deeply thought through than in most ` right-thinking ' films \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.1615, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.3478, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.3906, -0.2596, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.5819, 8.7250, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.6775, 9.7986, 9.6995, 9.8198,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.8151, 11.7249, 11.6356, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.7696, 11.6827, 11.7851, 11.8870, 11.9883,\n 11.9024, 11.8172, 11.7326, 11.8336, 11.9341, 12.0341, 11.9504, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.3603, 12.2782, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.4223, 12.5179, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.7378, 12.8313, 12.7532, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.6789, 13.7679, 13.8564, 13.7801, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ngoes to absurd lengths \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -0.9428,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.6830, -1.5492, -1.5878, -1.6262, -1.4938, -1.5323,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426, 4.5033, 4.3027, 4.5556,\n 4.8008, 4.6101, 4.4272, 4.6663, 4.8990, 4.7237, 4.9507, 5.1723, 5.0037,\n 5.2204, 5.4322, 5.2697, 5.1121, 5.3199, 5.5234, 5.3708, 5.5705, 5.7664,\n 5.9588, 6.1477, 6.0000, 5.8560, 6.0421, 5.9017, 5.7646, 5.6307, 5.8140,\n 5.6830, 5.8635, 5.7354, 5.6099, 5.4870, 5.3666, 5.5442, 5.4259, 5.6009,\n 5.7735, 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.4065, 6.5607, 6.7132, 6.6075,\n 6.5033, 6.6541, 6.8034, 6.7006, 6.8483, 6.9945, 7.1393, 7.2827, 7.1813,\n 7.3233, 7.2232, 7.1243, 7.0268, 6.9305, 7.0711, 6.9759, 7.1152, 7.0211,\n 6.9282, 6.8364, 6.7456, 6.8834, 6.7937, 6.9303, 7.0657, 6.9768, 7.1111,\n 7.2443, 7.1563, 7.2884, 7.4194, 7.3322, 7.2459, 7.3758, 7.5048, 7.4193,\n 7.5472, 7.6742, 7.5895, 7.7155, 7.8406, 7.7566, 7.6734, 7.7976, 7.9209,\n 7.8384, 7.9608, 8.0824, 8.2032, 8.3231, 8.2413, 8.1602, 8.0798, 8.0002,\n 7.9212, 7.8429, 7.9619, 7.8842, 8.0024, 7.9253, 7.8489, 7.7732, 7.6980,\n 7.8153, 7.7407, 7.8571, 7.9729, 7.8988, 8.0139, 8.1282, 8.0546, 8.1683,\n 8.2813, 8.2082, 8.1356, 8.2479, 8.3595, 8.2874, 8.3984, 8.5088, 8.4371,\n 8.5469, 8.6560, 8.5848, 8.5141, 8.6226, 8.7305, 8.6603, 8.7676, 8.8744,\n 8.9806, 9.0863, 9.0164, 8.9469, 9.0520, 8.9830, 8.9145, 8.8464, 8.9509,\n 8.8832, 8.9872, 8.9199, 8.8531, 8.7867, 8.7207, 8.8240, 8.7584, 8.8612,\n 8.9635, 8.8982, 9.0000, 9.1013, 9.0364, 9.1372, 9.2376, 9.1730, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfor those moviegoers who complain that ` they do n't make movies like they used to anymore \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "16.2%", + "z-score": "-2.56", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -3.2240, -3.2686, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.0237, -2.8180, -2.8638, -2.9092, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.6485, -2.4546, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.6296, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.4444, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.1884, -2.2323, -2.2758, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.0739, -1.9107, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.6264, -2.6640, -2.7014, -2.5560])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "149", + "Fraction of T in Greenlist": "74.9%", + "z-score": "16.2", + "p value": "1.15e-59", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 4.4264, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 9.8064, 9.9304, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.4945, 11.6041, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.9820, 13.0815, 12.9874, 13.0866,\n 12.9935, 13.0922, 13.1905, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.6781, 13.7730, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.3380, 14.4292, 14.3412, 14.4321,\n 14.5226, 14.6126, 14.7023, 14.7916, 14.8804, 14.9689, 15.0570, 14.9707,\n 15.0585, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.5783, 15.6633, 15.5792, 15.6640, 15.5805, 15.6651, 15.7494, 15.8333,\n 15.9169, 16.0002, 16.0832, 16.1658, 16.2481, 16.1660, 16.2481])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe part where nothing 's happening , \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.3587,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.2910, -1.1316, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.1263, -0.9759, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.6865, -1.7233, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "193", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "54.9%", + "z-score": "9.6", + "p value": "4e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660, 3.6566,\n 4.0166, 3.6556, 3.3333, 3.0424, 3.3947, 3.1305, 2.8868, 3.2206, 3.5382,\n 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 4.5556,\n 4.3644, 4.6101, 4.8488, 4.6663, 4.8990, 4.7237, 4.5547, 4.3916, 4.2339,\n 4.0814, 4.3083, 4.5301, 4.7469, 4.9592, 4.8107, 5.0186, 5.2223, 5.0779,\n 5.2778, 5.4740, 5.3333, 5.1962, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997,\n 5.3716, 5.2463, 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009,\n 5.7735, 5.9438, 5.8275, 5.9954, 6.1612, 6.0469, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.0928, 6.2505, 6.1450, 6.0410, 5.9386, 6.0943,\n 6.2483, 6.4006, 6.5514, 6.4501, 6.5993, 6.7469, 6.6469, 6.7931, 6.9378,\n 6.8391, 6.7416, 6.8849, 7.0268, 6.9305, 7.0711, 6.9759, 6.8819, 7.0211,\n 6.9282, 6.8364, 6.7456, 6.8834, 7.0201, 7.1556, 7.2900, 7.2001, 7.3333,\n 7.4655, 7.3765, 7.5076, 7.6376, 7.5494, 7.4622, 7.5912, 7.7192, 7.6328,\n 7.7598, 7.6742, 7.5895, 7.7155, 7.6315, 7.5484, 7.4661, 7.5910, 7.7152,\n 7.8384, 7.9608, 7.8791, 8.0006, 8.1214, 8.0403, 8.1602, 8.2793, 8.1989,\n 8.1192, 8.2375, 8.3550, 8.2760, 8.3927, 8.3143, 8.2365, 8.3525, 8.2754,\n 8.1988, 8.1229, 8.2381, 8.3526, 8.4664, 8.5796, 8.5041, 8.6166, 8.7284,\n 8.6535, 8.7647, 8.8752, 8.8008, 8.7270, 8.8369, 8.9461, 8.8728, 8.9815,\n 8.9086, 8.8364, 8.9444, 8.8726, 8.8013, 8.7305, 8.8379, 8.9447, 9.0510,\n 9.1567, 9.0863, 9.1915, 9.2961, 9.2261, 9.3302, 9.4338, 9.3642, 9.2952,\n 9.3982, 9.5007, 9.4320, 9.5341, 9.4658, 9.3980, 9.4995, 9.4321, 9.3651,\n 9.2986, 9.3995, 9.5000, 9.6000])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nsaw how bad this movie was \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "186", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.762", + "p value": "0.777", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.5894, -1.6449, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.0079, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, 0.0000,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.6149, -0.6600, -0.5168, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -0.8513,\n -0.8914, -0.7620])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.3183, 7.1857, 7.0557, 6.9282,\n 7.0895, 6.9646, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.0000, 6.8876, 6.7769, 6.6679, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 6.9511, 6.8483, 6.9945, 6.8931,\n 7.0379, 6.9378, 6.8391, 6.9824, 6.8849, 6.7886, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.6973, 6.8364, 6.9743, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.9768, 7.1111, 7.2443, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.1605, 7.2904, 7.2058, 7.1220, 7.2508, 7.3786,\n 7.2956, 7.4225, 7.5484, 7.6734, 7.5910, 7.7152, 7.8384, 7.7567,\n 7.8791, 7.7981, 7.7178, 7.8393, 7.7597, 7.8803, 8.0002, 8.1192,\n 8.0402, 8.1585, 8.0801, 8.1976, 8.1198, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.7681, 8.8800, 8.8039,\n 8.7284, 8.8396, 8.9502, 9.0601, 9.1694, 9.0944, 9.0200, 9.1287,\n 9.0548, 9.1629, 9.2704, 9.3774, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.7574, 9.6850, 9.6130, 9.7167, 9.6452, 9.7483,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 9.8746,\n 9.9752, 10.0753, 10.0061, 9.9374, 9.8691, 9.9687, 10.0679, 10.0000,\n 9.9325, 10.0312, 10.1295, 10.2273, 10.3248, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nlend some dignity to a dumb story \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.4000, 1.3245, 1.2501, 1.4382, 1.6239, 1.5492,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.6908, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.6823, 1.8419, 1.7778, 1.7143, 1.6514, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.9298, 1.8682, 1.8071, 1.9582, 2.1082,\n 2.0470, 2.1954, 2.1344, 2.0739, 2.0140, 2.1602, 2.3054, 2.2454,\n 2.1858, 2.1268, 2.0682, 2.2111, 2.1527, 2.0948, 2.0373, 2.1783,\n 2.3183, 2.2608, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.0068, 2.1429, 2.2780, 2.2230, 2.1685, 2.1143, 2.0605,\n 2.0071, 1.9540, 1.9013, 2.0339, 2.1656, 2.1128, 2.0604, 2.1909,\n 2.3206, 2.2680, 2.3967, 2.3443, 2.2923, 2.2406, 2.3679, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.3131, 2.2630,\n 2.3868, 2.5099, 2.4597, 2.4099, 2.3603, 2.3110, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.3580, 2.4778, 2.4294, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.1913, 2.1444, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "46.2%", + "z-score": "6.92", + "p value": "2.31e-12", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.5627, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.7897, 3.6667,\n 3.5466, 3.4293, 3.6380, 3.8431, 3.7273, 3.6141, 3.8146, 3.7033,\n 3.9001, 3.7905, 3.9837, 4.1740, 4.3614, 4.2528, 4.1461, 4.0415,\n 3.9386, 4.1219, 4.3026, 4.2008, 4.1008, 4.0024, 3.9056, 4.0825,\n 3.9869, 3.8927, 4.0667, 3.9736, 4.1451, 4.0531, 4.2222, 4.3894,\n 4.5547, 4.4630, 4.3727, 4.2836, 4.1957, 4.3580, 4.5186, 4.4313,\n 4.3451, 4.2601, 4.4182, 4.5747, 4.4901, 4.4066, 4.5611, 4.4783,\n 4.6311, 4.5491, 4.7001, 4.8497, 4.9980, 4.9163, 4.8355, 4.9820,\n 5.1273, 5.2713, 5.1908, 5.1111, 5.2535, 5.1745, 5.3156, 5.2372,\n 5.1597, 5.2992, 5.2223, 5.3606, 5.2842, 5.4212, 5.5572, 5.6921,\n 5.6160, 5.5407, 5.6743, 5.8069, 5.9386, 5.8635, 5.7892, 5.9196,\n 5.8458, 5.9752, 5.9019, 5.8292, 5.9575, 5.8853, 6.0125, 5.9409,\n 6.0671, 6.1926, 6.3172, 6.2458, 6.1750, 6.1047, 6.0351, 6.1584,\n 6.2810, 6.2116, 6.1429, 6.0746, 6.0069, 6.1283, 6.0609, 5.9941,\n 6.1146, 6.0481, 6.1677, 6.1017, 6.2205, 6.3385, 6.4559, 6.3901,\n 6.3247, 6.4413, 6.5571, 6.6724, 6.6072, 6.5424, 6.4781, 6.4143,\n 6.5285, 6.4650, 6.4019, 6.5153, 6.4526, 6.5653, 6.5029, 6.6150,\n 6.7264, 6.8373, 6.7751, 6.7132, 6.6517, 6.5906, 6.7006, 6.8101,\n 6.7492, 6.6887, 6.6285, 6.5688, 6.6774, 6.6179, 6.5588, 6.6667,\n 6.6078, 6.7151, 6.6565, 6.7632, 6.8695, 6.9752, 6.9167])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe greatest musicians \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -2.7218, -2.4495, -2.5062, -2.5621, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.2743, -2.3301, -2.3851, -2.4394, -2.1997, -2.2549, -2.0207,\n -2.0767, -1.8477, -1.9044, -1.9604, -2.0156, -1.7942, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.6997, -1.4940, -1.2910,\n -1.3472, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -0.9567, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.0206,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.6274, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.4857, -0.3522, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.9567, 8.7970, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.0924, 8.9455, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.3811, 9.2418, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.3288, 10.1982, 10.0698,\n 10.1999, 10.3287, 10.4565, 10.5830, 10.4579, 10.3347, 10.4608, 10.5859,\n 10.7098, 10.8327, 10.7125, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 10.9621, 10.8477, 10.9669, 11.0851, 11.2025, 11.3189, 11.2069, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.4471, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.6829, 11.5788, 11.6894, 11.7992, 11.9083, 12.0167,\n 11.9144, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.3468, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.5657, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.9771, 12.8819, 12.9820, 13.0815, 13.1806, 13.2791,\n 13.3770, 13.2834, 13.3810, 13.4780, 13.5746, 13.6707, 13.5784, 13.4868,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.7772, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 14.1543, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.5226, 14.4355, 14.5257, 14.6155, 14.7049, 14.7939, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.0629, 14.9786, 15.0657, 15.1524,\n 15.2387, 15.3247, 15.4103, 15.3272, 15.4126, 15.4976, 15.5823, 15.6667,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 16.0019, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ncold movie \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.8926, 0.8238, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 1.0289, 0.9631, 1.1375, 1.3101, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812, 0.9258, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.2611, 1.2060, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.5614, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.4662, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.2136, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 0.9062, 1.0328, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.8866, 1.0106, 0.9659, 1.0890, 1.0444, 1.0000,\n 1.1221, 1.0777, 1.0336, 0.9897, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 4.9373, 4.8003, 4.6667,\n 4.8662, 5.0623, 4.9316, 5.1241, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 5.8812, 5.7689, 5.6585, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.5818, 7.4853, 7.3901, 7.5258, 7.4316, 7.3386, 7.2466,\n 7.1556, 7.2900, 7.4233, 7.3333, 7.4655, 7.5967, 7.5076, 7.4194,\n 7.5494, 7.6785, 7.5912, 7.7192, 7.8463, 7.7598, 7.8859, 7.8003,\n 7.7155, 7.8406, 7.9649, 7.8808, 7.7976, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.3231, 8.4423, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.7831, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.0987, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.4724, 10.5725, 10.6722, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 10.8505, 10.9480, 11.0450, 11.1415, 11.2376, 11.3333,\n 11.2624, 11.3577, 11.2872, 11.3820, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwith his usual intelligence and subtlety \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.64", + "p value": "0.000135", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.3389, 1.2710, 1.2039, 1.3770, 1.3101, 1.4809, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.6166, 1.5511, 1.7150, 1.6498, 1.8116,\n 1.7467, 1.6823, 1.8419, 1.7778, 1.7143, 1.6514, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.7767, 1.9298, 1.8682, 2.0197, 1.9582, 1.8974,\n 2.0470, 1.9863, 2.1344, 2.0739, 2.2205, 2.1602, 2.1005, 2.2454,\n 2.1858, 2.3293, 2.2699, 2.4121, 2.3529, 2.2943, 2.4348, 2.3764,\n 2.5156, 2.4574, 2.5954, 2.5373, 2.4797, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.5802, 2.5238, 2.4678, 2.6014, 2.5456, 2.6781, 2.6224,\n 2.7539, 2.6984, 2.6433, 2.7735, 2.7186, 2.8478, 2.7930, 2.9212,\n 2.8666, 2.8124, 2.9394, 2.8853, 3.0114, 2.9575, 3.0827, 3.0290,\n 2.9756, 3.0997, 3.0464, 3.1696, 3.1166, 3.2389, 3.1860, 3.1334,\n 3.2547, 3.2023, 3.3228, 3.2705, 3.3902, 3.3381, 3.2863, 3.4050,\n 3.3534, 3.4713, 3.4198, 3.5370, 3.4857, 3.4346, 3.5509, 3.5000,\n 3.6156, 3.5648, 3.6797, 3.6291, 3.5787, 3.6927, 3.6425])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.0622,\n 5.9438, 5.8275, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 6.8034, 6.9511, 6.8483, 6.7469, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.0000, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 8.9178, 9.0370, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.0134, 8.9285, 8.8443, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.4594, 10.3827, 10.3065, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.7671, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.3608, 11.2864, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.2607, 11.3572, 11.2848, 11.2129, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nredundant concept \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "163", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "14.7%", + "z-score": "-3.03", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.3126, -2.3604, -2.4077, -2.4546, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.6667, -2.7097, -2.7524, -2.7948, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.7344, -2.7744, -2.8141, -2.8536, -2.8928, -2.9317, -2.9704,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -3.0417, -2.8868,\n -2.9247, -2.9625, -3.0000, -2.8475, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -3.0706, -2.9216, -2.9584, -2.9950, -3.0315, -3.0677, -3.1038,\n -3.1396, -2.9938, -3.0298])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 7.9472, 7.8296, 7.7139, 7.8598, 8.0042,\n 7.8905, 7.7784, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.0698, 9.9783, 10.0926, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.4307, 10.3445, 10.4537, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.3333, 10.4407, 10.5475, 10.4652, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.6196, 10.5393, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.7175, 10.8200, 10.7423, 10.8443, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.9936, 10.9176, 10.8421, 10.9422, 10.8673, 10.9669,\n 10.8925, 10.9917, 11.0904, 11.0165, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.0913, 11.0194, 10.9480, 10.8770, 10.9740, 11.0705, 11.1667,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nswimming is above all about a young woman 's face , and by casting an actress whose face projects that woman 's doubts and yearnings , it succeeds . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.5621, 2.4585, 2.6713, 2.8804, 2.7775,\n 2.9824, 3.1840, 3.3824, 3.2796, 3.1787, 3.0796, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.8919, 3.0806, 2.9887, 3.1743, 3.0833, 2.9938,\n 2.9057, 2.8189, 3.0000, 2.9140, 3.0924, 3.2686, 3.1829, 3.3566,\n 3.5283, 3.6979, 3.6122, 3.5277, 3.4442, 3.3619, 3.2806, 3.2004,\n 3.1211, 3.2863, 3.4498, 3.3708, 3.2928, 3.4538, 3.3764, 3.2998,\n 3.2242, 3.3826, 3.3075, 3.4641, 3.6193, 3.5443, 3.6977, 3.8497,\n 4.0004, 3.9254, 3.8512, 3.7778, 3.7051, 3.6332, 3.5620, 3.4915,\n 3.6389, 3.7852, 3.7148, 3.6452, 3.7897, 3.7205, 3.6519, 3.5839,\n 3.7265, 3.6590, 3.8002, 3.9404, 3.8730, 4.0119, 4.1498, 4.2866,\n 4.2191, 4.1522, 4.0859, 4.0202, 3.9549, 3.8903, 3.8262, 3.9606,\n 4.0941, 4.0301, 3.9666, 3.9036, 3.8411, 3.7791, 3.7176, 3.8490,\n 3.7878, 3.9181, 4.0476, 3.9865, 4.1150, 4.2426, 4.3695, 4.3083,\n 4.2475, 4.1872, 4.1273, 4.0678, 4.0087, 3.9501, 4.0750, 4.1992,\n 4.1406, 4.0825, 4.0247, 3.9673, 3.9104, 3.8538, 3.9762, 3.9198,\n 4.0415, 4.1624, 4.1061, 4.2262, 4.3456, 4.4644, 4.4080, 4.3519,\n 4.2962, 4.2409, 4.1859, 4.1312, 4.0768, 4.1940, 4.3106, 4.2563,\n 4.2023, 4.3180, 4.2642, 4.2108, 4.1576, 4.2723, 4.2193, 4.3333,\n 4.4468, 4.3938, 4.5066, 4.6188, 4.7305, 4.6775, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495, 2.1170,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094, 2.1004, 2.4495,\n 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998, 3.5796, 3.8497, 3.6667,\n 3.4915, 3.7524, 3.5839, 3.8367, 4.0825, 3.9196, 4.1586, 4.0012, 4.2339,\n 4.4610, 4.3083, 4.5301, 4.7469, 4.5985, 4.4544, 4.3142, 4.1779, 4.3894,\n 4.2563, 4.4634, 4.6667, 4.8662, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.2463, 5.1236, 5.3067, 5.4870, 5.3666, 5.5442, 5.4259, 5.3100,\n 5.4848, 5.6573, 5.5432, 5.7133, 5.8812, 5.7689, 5.9346, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.0410, 6.1968, 6.0943,\n 5.9932, 5.8936, 6.0474, 5.9491, 6.1012, 6.0041, 6.1546, 6.3035, 6.2075,\n 6.1128, 6.2601, 6.1664, 6.0740, 5.9827, 6.1283, 6.2725, 6.1820, 6.0927,\n 6.2354, 6.1470, 6.2883, 6.4283, 6.3408, 6.4795, 6.3928, 6.5303, 6.6667,\n 6.5807, 6.7159, 6.8500, 6.7648, 6.6804, 6.5970, 6.5144, 6.6471, 6.5653,\n 6.6968, 6.8274, 6.9570, 6.8757, 7.0043, 7.1319, 7.0513, 7.1779, 7.3037,\n 7.2236, 7.1443, 7.2691, 7.3930, 7.3143, 7.2363, 7.3592, 7.4813, 7.4039,\n 7.5251, 7.6456, 7.5687, 7.6883, 7.6120, 7.5364, 7.6551, 7.5800, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.7831, 7.8988, 7.8253, 7.7524, 7.6800, 7.7949,\n 7.7230, 7.8372, 7.7658, 7.8793, 7.9921, 7.9211, 7.8507, 7.9628, 8.0742,\n 8.0042, 8.1150, 8.0455, 7.9764, 8.0865, 8.1960, 8.1273, 8.2362, 8.3446,\n 8.2762, 8.3840, 8.3161, 8.2486, 8.3557, 8.2887, 8.3952, 8.3286, 8.4345,\n 8.5399, 8.4736, 8.5785, 8.5126, 8.4471, 8.3820, 8.4862, 8.4215, 8.5252,\n 8.4608, 8.5640, 8.6667, 8.6026, 8.5390, 8.6411, 8.5778, 8.6794, 8.6164,\n 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nequals the original and in some ways even betters it \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.6977, 1.6013, 1.8240, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.9711, 1.8791, 1.7889, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.7130, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.3483, 1.5164, 1.4506, 1.3856, 1.3213, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.4580, 1.6186, 1.5556, 1.4931, 1.6514, 1.5892, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.9298, 1.8682, 1.8071, 1.7465, 1.8974,\n 1.8370, 1.7772, 1.7179, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.5808, 0.5345,\n 0.6662, 0.7971, 0.7506, 0.8805, 0.8340, 0.7878, 0.9165, 1.0445,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.9870, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.2244, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.2883, 1.4093, 1.3644, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 2.9439, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.1156, 2.9704, 2.8301, 3.0792, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.0067, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.6894, 10.5955, 10.7074, 10.6145,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.6936,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.7559, 10.6700, 10.5848,\n 10.6920, 10.6076, 10.5238, 10.4407, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.9091, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 11.8956, 11.9927, 12.0893, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.2214, 12.1447, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 12.9087, 13.0000,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.2864, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nif anything , see it for karen black , who camps up a storm as a fringe feminist conspiracy theorist named dirty dick . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.1167, -2.1664, -2.2156,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.4449, -2.4875, -2.5298,\n -2.3619, -2.1954, -2.2385, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.7341, -2.7721, -2.8098,\n -2.6605, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -2.9957, -3.0302, -3.0645,\n -2.9289, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -3.1342, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 6.5997,\n 6.8127, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 8.0829, 7.8780, 8.0546, 8.2281, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.3557, 8.5206, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 8.8121, 8.9672, 9.1201, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.5406, 9.6838, 9.8254, 9.9653, 9.8150,\n 9.6676, 9.8072, 9.6632, 9.8020, 9.6612, 9.7989, 9.6612, 9.7980,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.9634, 11.0870, 10.9589, 11.0818, 10.9560, 11.0782,\n 11.1994, 11.3196, 11.1967, 11.3163, 11.4349, 11.5525, 11.4323, 11.5494,\n 11.6656, 11.5476, 11.6632, 11.7779, 11.8918, 11.7762, 11.8896, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.1107, 12.2207, 12.3299, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.6509, 12.5434, 12.6496, 12.7551, 12.8599,\n 12.9641, 12.8586, 12.7542, 12.8582, 12.7550, 12.8586, 12.7567, 12.8598,\n 12.7590, 12.8618, 12.7622, 12.8645, 12.9662, 13.0674, 13.1680, 13.2680,\n 13.3674, 13.4664, 13.5647, 13.6626, 13.7599, 13.8567, 13.7599, 13.8564,\n 13.7606, 13.8567, 13.9524, 14.0475, 13.9530, 14.0479, 14.1422, 14.2361,\n 14.1428, 14.2364, 14.3295, 14.2373, 14.3301, 14.4225, 14.5144, 14.4234,\n 14.5150, 14.4248, 14.5161, 14.6070, 14.6976, 14.7877, 14.6986, 14.7885,\n 14.8779, 14.7898, 14.8789, 14.9677, 15.0560, 15.1440, 15.0570, 15.1448,\n 15.2321, 15.3191, 15.4057, 15.3198, 15.2345, 15.3210, 15.2364, 15.3226,\n 15.2387, 15.3247, 15.2414, 15.3272, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na smile on your face \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -2.0641, -1.8974,\n -1.9420, -1.9863, -2.0303, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.1057, -2.1470, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.0751, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -1.9686, -2.0059, -1.8728,\n -1.9101, -1.9473, -1.9843, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 8.0167, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.1287, 9.0179, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 8.9815, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 9.9187, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.6490, 9.5543, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.4307, 10.3445, 10.2592, 10.1745, 10.2837, 10.1999,\n 10.1167, 10.2253, 10.1429, 10.2509, 10.1692, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.6683, 10.5893,\n 10.5109, 10.4330, 10.5366, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.8421, 10.7671, 10.8673, 10.9669,\n 11.0661, 10.9917, 11.0904, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.5489, 11.6441, 11.7389, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ncomes from the brave , uninhibited performances \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.7614, -1.8071, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -1.9379, -1.9803,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -2.9943, -3.0292, -3.0639, -3.0984, -2.9611, -2.9957, -3.0302, -2.8943,\n -2.9289, -2.9633, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -3.1342, -3.1674, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.6883, 7.5615, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 8.8853, 8.7758, 8.6678, 8.8007, 8.9324, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.0380,\n 9.9392, 9.8414, 9.7447, 9.8634, 9.9813, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 10.8916,\n 10.8025, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.4762, 11.5797, 11.4935, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 11.8503, 11.7672, 11.6847,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.6597, 12.5820, 12.5049, 12.4283,\n 12.5221, 12.6153, 12.5394, 12.6323, 12.7248, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.1063, 13.0316, 13.1219, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nexcruciatingly unfunny and pitifully unromantic \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.2629, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.4004,\n -2.4441, -2.4874, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.6131, -2.6533, -2.6933, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.9600, -2.8098,\n -2.8472, -2.8845, -2.7361, -2.7735, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -3.0657, -3.1013, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -2.9935, -3.0288, -3.0638, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.1667,\n -3.2000, -3.2332, -3.1009, -3.1342, -3.1674, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "56.1%", + "z-score": "10.1", + "p value": "2.95e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.9704, 2.8301, 3.0792, 3.3221, 3.1844, 3.0509, 2.9212,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.0684, 5.2485, 5.1326, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.7006, 6.5993, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.0211, 6.9282, 6.8364, 6.9743, 6.8834, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.5967, 7.5076, 7.6376,\n 7.5494, 7.4622, 7.3758, 7.5048, 7.4193, 7.3346, 7.4625, 7.5895,\n 7.7155, 7.8406, 7.9649, 8.0882, 8.0042, 8.1266, 8.0434, 7.9608,\n 7.8791, 8.0006, 7.9196, 7.8393, 7.9600, 8.0798, 8.1989, 8.3172,\n 8.4348, 8.5516, 8.4718, 8.5879, 8.5088, 8.4303, 8.3525, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.6556, 8.7681, 8.8800, 8.9912,\n 8.9151, 9.0257, 8.9502, 8.8752, 8.8008, 8.9107, 8.8369, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.3374, 9.4432,\n 9.3708, 9.2990, 9.2276, 9.3328, 9.2619, 9.1915, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.6356, 9.7367, 9.6684, 9.6005, 9.7011, 9.8012, 9.9008, 10.0000,\n 10.0987, 10.1970, 10.1295, 10.2273, 10.1602, 10.0935])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nenriched by an imaginatively mixed cast of antic spirits \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 3.8367, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 5.8635,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.8398, 6.0125, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 6.8034, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 8.7913, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 8.9178, 9.0370, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.6471, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 9.8197, 9.9278,\n 10.0353, 9.9562, 10.0631, 9.9846, 10.0910, 10.1968, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 10.9697, 11.0702, 10.9936, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.5489, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.5235, 11.6179, 11.7120, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwhich half of dragonfly is worse : the part where nothing 's happening , or the part where something 's happening \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.5843, 1.7638, 1.6908, 1.8677, 1.7951, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.9262, 1.8559, 2.0247, 2.1917, 2.1213,\n 2.0517, 1.9829, 1.9149, 1.8475, 1.7809, 1.7150, 1.8773, 2.0381,\n 1.9720, 2.1309, 2.2884, 2.2222, 2.1567, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.0548, 1.9920, 2.1442, 2.0817, 2.2323, 2.1700, 2.1082,\n 2.0470, 1.9863, 1.9261, 1.8665, 2.0140, 1.9545, 1.8956, 1.8371,\n 1.9825, 1.9242, 2.0682, 2.0101, 1.9524, 1.8953, 2.0373, 2.1783,\n 2.1210, 2.0642, 2.0078, 2.1470, 2.2852, 2.2287, 2.3657, 2.3094,\n 2.4453, 2.3891, 2.3333, 2.2780, 2.4122, 2.3570, 2.4902, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.3500, 2.2966, 2.2436, 2.3735,\n 2.5026, 2.4495, 2.5776, 2.7050, 2.6519, 2.5990, 2.5466, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.2377, 2.1886, 2.3110, 2.2620, 2.2133,\n 2.1648, 2.2860, 2.2377, 2.3580, 2.3098, 2.2618, 2.2141, 2.3333,\n 2.4520, 2.4042, 2.3567, 2.3094, 2.4269, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.4850, 9.6347, 9.4589, 9.6077, 9.7545, 9.8995, 10.0426, 10.1840,\n 10.3237, 10.4618, 10.5982, 10.7331, 10.8666, 10.9985, 10.8382, 10.6810,\n 10.5269, 10.3758, 10.2275, 10.3621, 10.4952, 10.6270, 10.4834, 10.3423,\n 10.4739, 10.6043, 10.4667, 10.3314, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.4565, 10.5838, 10.4565, 10.3310, 10.4579, 10.5837, 10.4608, 10.3397,\n 10.2202, 10.1024, 9.9863, 10.1124, 10.2375, 10.3615, 10.2476, 10.1352,\n 10.2587, 10.1479, 10.2706, 10.1614, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.0855, 9.9817, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.4636, 10.3695, 10.4829, 10.5955, 10.7074, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.6534, 10.7635, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.7141, 10.8224, 10.7349, 10.6481, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 10.9048, 10.8204, 10.9259, 10.8423, 10.7594, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.9091, 10.8282, 10.9317, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.0004, 10.9220, 10.8443, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.8170, 10.9176, 10.8421, 10.9422, 10.8673, 10.7928,\n 10.8925, 10.8186, 10.9178, 11.0165, 10.9431, 11.0414, 10.9685, 11.0663,\n 10.9939, 11.0913, 11.0194, 10.9480, 11.0450, 10.9740, 11.0705, 11.1667,\n 11.0961, 11.1919, 11.1218, 11.0521, 11.1475, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nin world cinema \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.1390, -1.1825, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -0.9129,\n -0.9555, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.2049, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "86", + "Fraction of T in Greenlist": "43.2%", + "z-score": "5.93", + "p value": "1.47e-09", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.5744, 2.8301, 3.0792, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 2.8804, 2.7775,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.4743, 3.3729, 3.2733, 3.1754,\n 3.3657, 3.5533, 3.7383, 3.9208, 3.8228, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.8000, 3.9736, 3.8819, 3.7916, 3.7025, 3.6148,\n 3.5283, 3.6979, 3.8657, 4.0316, 3.9452, 3.8600, 3.7758, 3.6927,\n 3.6107, 3.7732, 3.9340, 4.0931, 4.0112, 3.9302, 3.8503, 3.7712,\n 3.9276, 4.0825, 4.2359, 4.3879, 4.3086, 4.2303, 4.1528, 4.0762,\n 4.0004, 4.1497, 4.2977, 4.4444, 4.3687, 4.2938, 4.2196, 4.3644,\n 4.2907, 4.4341, 4.5762, 4.7173, 4.6437, 4.5708, 4.4987, 4.4272,\n 4.3564, 4.4953, 4.6332, 4.7700, 4.6992, 4.6291, 4.5596, 4.4907,\n 4.4225, 4.5573, 4.6912, 4.8242, 4.7559, 4.6883, 4.6212, 4.5547,\n 4.6860, 4.8164, 4.9460, 5.0747, 5.0080, 4.9419, 4.8763, 4.8113,\n 4.9385, 5.0649, 5.1905, 5.3153, 5.2501, 5.1854, 5.1213, 5.0576,\n 4.9943, 5.1177, 5.2402, 5.3621, 5.2989, 5.2362, 5.1739, 5.1121,\n 5.0507, 5.1711, 5.2909, 5.4100, 5.3487, 5.2877, 5.2272, 5.1671,\n 5.1073, 5.2251, 5.3423, 5.4588, 5.3991, 5.3398, 5.2809, 5.2223,\n 5.3377, 5.4526, 5.5668, 5.6804, 5.6217, 5.5635, 5.5056, 5.4480,\n 5.3909, 5.5033, 5.6153, 5.7266, 5.6695, 5.6126, 5.5562, 5.5000,\n 5.6104, 5.7203, 5.6643, 5.7735, 5.8822, 5.8263, 5.9345])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nvery good viewing alternative \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.4384, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.6274, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.6058, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, 0.2722, 0.6623, 1.0328, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.0381, 1.8889, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.8868, 3.1027, 3.3147, 3.2026, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.7700, 3.9595, 4.1461, 4.3301,\n 4.2251, 4.1219, 4.3026, 4.2008, 4.1008, 4.0024, 4.1797, 4.0825,\n 3.9869, 4.1612, 4.3333, 4.5034, 4.4083, 4.5760, 4.7419, 4.6476,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.4482, 5.3541, 5.5090, 5.6622,\n 5.8139, 5.9641, 6.1128, 6.2601, 6.1664, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 7.1110, 7.0201,\n 7.1556, 7.2900, 7.2001, 7.1111, 7.2443, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.8463, 7.9724, 7.8859, 8.0111,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.2107, 8.3324, 8.2483, 8.3691,\n 8.4891, 8.4057, 8.3231, 8.2413, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.0910, 10.0131, 9.9357, 10.0416,\n 9.9648, 9.8887, 9.9940, 9.9184, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.1846, 10.2872, 10.3893, 10.4909, 10.4170, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.4281, 10.5278, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.8505, 10.9480, 11.0450, 10.9740, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe plot is nothing but boilerplate clich\u00e9s from start to finish , \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.2678, -2.3170, -2.3658, -2.4140, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.5403, -2.5852, -2.6296, -2.6737, -2.4910,\n -2.5355, -2.5796, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.9109, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.6961, -2.7369, -2.7775, -2.8177, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.8928, -2.9317, -2.9704,\n -3.0089, -3.0471, -3.0851, -2.9277, -2.9659, -3.0039, -3.0417, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -3.0706, -3.1071, -3.1433, -2.9950, -3.0315, -3.0677, -3.1038,\n -3.1396, -3.1753, -3.2107, -3.2460, -3.2811, -3.3160, -3.3508, -3.3853,\n -3.2420, -3.2768, -3.3113, -3.3457, -3.2043, -3.2389, -3.2733, -3.3075,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.3044, -3.3381, -3.3716, -3.4050,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "199", + "Fraction of T in Greenlist": "100.0%", + "z-score": "24.4", + "p value": "3.76e-132", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 9.1652, 9.3274, 9.4868, 9.6437, 9.7980,\n 9.9499, 10.0995, 10.2470, 10.3923, 10.5357, 10.6771, 10.8167, 10.9545,\n 11.0905, 11.2250, 11.3578, 11.4891, 11.6189, 11.7473, 11.8743, 12.0000,\n 12.1244, 12.2474, 12.3693, 12.4900, 12.6095, 12.7279, 12.8452, 12.9615,\n 13.0767, 13.1909, 13.3041, 13.4164, 13.5277, 13.6382, 13.7477, 13.8564,\n 13.9642, 14.0712, 14.1774, 14.2829, 14.3875, 14.4914, 14.5945, 14.6969,\n 14.7986, 14.8997, 15.0000, 15.0997, 15.1987, 15.2971, 15.3948, 15.4919,\n 15.5885, 15.6844, 15.7797, 15.8745, 15.9687, 16.0624, 16.1555, 16.2481,\n 16.3401, 16.4317, 16.5227, 16.6132, 16.7033, 16.7929, 16.8819, 16.9706,\n 17.0587, 17.1464, 17.2337, 17.3205, 17.4069, 17.4929, 17.5784, 17.6635,\n 17.7482, 17.8326, 17.9165, 18.0000, 18.0831, 18.1659, 18.2483, 18.3303,\n 18.4120, 18.4932, 18.5742, 18.6548, 18.7350, 18.8149, 18.8944, 18.9737,\n 19.0526, 19.1311, 19.2094, 19.2873, 19.3649, 19.4422, 19.5192, 19.5959,\n 19.6723, 19.7484, 19.8242, 19.8997, 19.9750, 20.0499, 20.1246, 20.1990,\n 20.2731, 20.3470, 20.4206, 20.4939, 20.5670, 20.6398, 20.7123, 20.7846,\n 20.8567, 20.9284, 21.0000, 21.0713, 21.1424, 21.2132, 21.2838, 21.3542,\n 21.4243, 21.4942, 21.5639, 21.6333, 21.7025, 21.7715, 21.8403, 21.9089,\n 21.9773, 22.0454, 22.1133, 22.1811, 22.2486, 22.3159, 22.3830, 22.4499,\n 22.5167, 22.5832, 22.6495, 22.7156, 22.7816, 22.8473, 22.9129, 22.9783,\n 23.0434, 23.1084, 23.1733, 23.2379, 23.3024, 23.3666, 23.4307, 23.4947,\n 23.5584, 23.6220, 23.6854, 23.7487, 23.8118, 23.8747, 23.9374, 24.0000,\n 24.0624, 24.1247, 24.1868, 24.2487, 24.3105, 24.3721, 24.4336])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe action is stilted \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.1459, 0.2907, 0.4345, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.3073, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.2940, 0.2513, 0.2089, 0.3333,\n 0.2909, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 2.0370, 2.3190, 2.5924, 2.4495,\n 2.7136, 2.5744, 2.8301, 3.0792, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.8038, 4.9962, 4.8712, 5.0602, 4.9377,\n 4.8177, 4.7002, 4.8857, 5.0684, 5.2485, 5.1326, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 7.7782,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 8.9752, 8.8860, 9.0060, 9.1252, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.6471, 9.7590, 9.8702, 9.9807, 9.8975, 9.8150,\n 9.7331, 9.8431, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.2486, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.6397, 10.7423, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.4581, 11.5549, 11.6514, 11.5771,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\non all cylinders \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.4086, -1.2337, -1.2837, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.4289,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.2585, -1.2982, -1.3377, -1.2049, -1.2445, -1.1127, -0.9816, -1.0215,\n -0.8914, -0.7620, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321, 2.1004, 2.4495,\n 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712, 3.5796, 3.3968, 3.2222,\n 3.4915, 3.7524, 3.5839, 3.8367, 3.6742, 3.5176, 3.7626, 3.6108, 3.4641,\n 3.7017, 3.9337, 3.7905, 3.6515, 3.5165, 3.3853, 3.6098, 3.4816, 3.7009,\n 3.5753, 3.4528, 3.3333, 3.5466, 3.7559, 3.9614, 3.8431, 3.7273, 3.6141,\n 3.8146, 4.0119, 3.9001, 4.0937, 3.9837, 3.8759, 3.7700, 3.6662, 3.5642,\n 3.4641, 3.3657, 3.5533, 3.7383, 3.6407, 3.8228, 3.7264, 3.6315, 3.5382,\n 3.7166, 3.8927, 3.8000, 3.7087, 3.8819, 3.7916, 3.9624, 3.8730, 3.7849,\n 3.9530, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775, 4.8347, 4.7469,\n 4.6603, 4.5747, 4.7296, 4.8830, 4.7980, 4.9497, 4.8655, 4.7823, 4.9322,\n 4.8497, 4.7682, 4.9163, 5.0630, 4.9820, 4.9019, 4.8226, 4.7442, 4.8889,\n 4.8111, 4.9543, 4.8772, 4.8008, 4.7252, 4.8666, 5.0070, 5.1461, 5.0707,\n 4.9960, 4.9221, 5.0596, 5.1962, 5.1225, 5.2578, 5.1848, 5.1123, 5.0406,\n 4.9695, 4.8990, 4.8291, 4.7599, 4.8930, 4.8242, 4.7559, 4.6883, 4.8200,\n 4.9507, 5.0806, 5.2096, 5.3378, 5.2699, 5.3970, 5.5233, 5.6488, 5.5811,\n 5.7056, 5.8294, 5.7619, 5.8848, 6.0069, 5.9397, 6.0609, 6.1815, 6.1146,\n 6.0481, 6.1677, 6.2866, 6.4048, 6.3385, 6.4559, 6.3901, 6.5067, 6.4413,\n 6.5571, 6.6724, 6.6072, 6.5424, 6.6568, 6.7706, 6.7061, 6.8192, 6.7551,\n 6.8675, 6.9793, 7.0905, 7.2012, 7.3113, 7.4208, 7.5297, 7.6381, 7.5738,\n 7.6816, 7.7889, 7.7249, 7.8316, 7.9377, 8.0433, 8.1485, 8.0847, 8.1892,\n 8.2933, 8.3969, 8.5000, 8.4364, 8.5390, 8.4757, 8.4128, 8.5148, 8.4523,\n 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwill find little of interest in this film , which is often preachy and poorly acted \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.7140, -1.7566, -1.7990, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.0476, -2.0866, -2.1254, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -2.0369, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.1167, -2.1532, -2.1896, -2.2258, -2.0943, -2.1306, -2.1667,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "162", + "Fraction of T in Greenlist": "81.4%", + "z-score": "18.4", + "p value": "1.02e-75", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 8.8029, 8.9567, 9.1084, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.9653, 10.1036,\n 9.9540, 10.0915, 10.2275, 10.3621, 10.4952, 10.6270, 10.4834, 10.3423,\n 10.4739, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.3389, 11.4599, 11.5799, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 12.0386, 12.1533, 12.2671, 12.3801, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.7017, 12.8110, 12.6909, 12.5723, 12.6815,\n 12.5646, 12.6735, 12.7815, 12.8889, 12.9955, 13.1015, 13.2067, 13.3113,\n 13.4152, 13.5185, 13.4057, 13.5086, 13.6109, 13.4999, 13.6019, 13.7032,\n 13.8039, 13.9040, 14.0036, 14.1025, 14.2009, 14.2988, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.6736, 14.5682, 14.4639, 14.5593, 14.4562,\n 14.5513, 14.6459, 14.7400, 14.8337, 14.9269, 15.0195, 15.1118, 15.2036,\n 15.2949, 15.1946, 15.2857, 15.3764, 15.2774, 15.3678, 15.4578, 15.5473,\n 15.6365, 15.7252, 15.8135, 15.9014, 15.9889, 15.8923, 15.9796, 16.0665,\n 16.1531, 16.2392, 16.3250, 16.4104, 16.4954, 16.4009, 16.4857, 16.5702,\n 16.6543, 16.7381, 16.6450, 16.7286, 16.8118, 16.8948, 16.8028, 16.8855,\n 16.9680, 17.0500, 17.1318, 17.2133, 17.2944, 17.3752, 17.4557, 17.5359,\n 17.6158, 17.6954, 17.6058, 17.6852, 17.7643, 17.8432, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.1453, 18.2226, 18.2996, 18.3763])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nby far the worst movie of the year \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.2623, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.0328, -0.9012, -0.9415, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.2222, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.1241, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.2147, 9.1101, 9.0067, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.4312, 10.5427, 10.4524, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.0067,\n 12.9238, 13.0185, 13.1129, 13.0307, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.2542, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.7327, 13.6546, 13.5771, 13.6667,\n 13.7559, 13.6789, 13.6025, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nsit through , \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.7543, 0.6928, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 0.8295, 0.9909, 0.9316, 0.8729,\n 1.0319, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 1.0593, 1.0050, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.0215, 1.1513, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "187", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "67.9%", + "z-score": "13.6", + "p value": "3.82e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 10.8916,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.4762, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.6311, 11.7326, 11.8336, 11.7498, 11.8503, 11.9504, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.1617, 13.2542, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.6313, 13.5526])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nmore than another `` best man '' clone by weaving a theme throughout this funny film \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.4685, 0.6222, 0.5680, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.5547, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.5808, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.7336, 0.6885, 0.6437, 0.5991, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 6.1101, 5.8966, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.3051, 7.4838, 7.6594, 7.8320, 7.6613, 7.8320, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.0139, 7.8628, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.6719, 9.8058, 9.6786, 9.5534,\n 9.6867, 9.8187, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.7277, 10.8477, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.3228,\n 11.2124, 11.1033, 11.2187, 11.3333, 11.2259, 11.3399, 11.4531, 11.5655,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 11.7992, 11.9083, 12.0167,\n 11.9144, 12.0223, 12.1295, 12.2360, 12.3419, 12.4471, 12.3468, 12.4516,\n 12.5557, 12.4567, 12.3586, 12.4625, 12.5657, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.2791,\n 13.3770, 13.2834, 13.3810, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.8642, 13.7730, 13.6826, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.2539,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.6225,\n 14.7113, 14.6267, 14.7152, 14.8034, 14.7195, 14.6362, 14.7242, 14.8119,\n 14.7293, 14.8167, 14.9037, 14.9903, 15.0766, 15.1625, 15.2481, 15.3333,\n 15.4182, 15.5028, 15.4217, 15.5060, 15.4254, 15.5095, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's about issues most adults have to face in marriage and i think that 's what i liked about it -- the real issues tucked between the silly and crude storyline \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.0685, -3.1129, -3.1568, -3.2004,\n -3.2435, -3.0429, -2.8446, -2.8893, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -3.0448, -3.0870, -2.9013, -2.9439,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.5280, -3.5645,\n -3.4035, -3.4403, -3.4769, -3.5132, -3.5494, -3.5853, -3.6210, -3.6566,\n -3.6919, -3.7270, -3.7619, -3.7966, -3.6420, -3.6770, -3.7117, -3.7463,\n -3.7808, -3.6289, -3.6635, -3.5131, -3.5480, -3.5827, -3.6172, -3.6515,\n -3.6856, -3.7196, -3.5725, -3.6067, -3.6407, -3.4953, -3.5295, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.5555, -3.5890, -3.6224, -3.6556,\n -3.6887, -3.7216, -3.7543, -3.7869, -3.6477, -3.6805, -3.7131, -3.7455,\n -3.7778, -3.8100, -3.8420, -3.8739, -3.7376, -3.7697, -3.8016, -3.6667,\n -3.6987, -3.7306, -3.7624, -3.7940, -3.8255, -3.8569, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570, 2.6558, 2.4910, 2.7778,\n 3.0551, 2.8947, 3.1623, 3.0072, 3.2660, 3.1156, 2.9704, 2.8301, 2.6943,\n 2.9424, 2.8098, 3.0509, 3.2863, 3.5165, 3.7417, 3.9620, 4.1779, 4.3894,\n 4.5968, 4.8003, 5.0000, 5.1962, 5.3889, 5.2549, 5.1241, 5.3134, 5.4997,\n 5.3716, 5.5549, 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.4259, 5.6009,\n 5.4848, 5.3709, 5.2590, 5.1490, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.1978, 5.0990, 5.2615, 5.4222, 5.3245,\n 5.4832, 5.3867, 5.2915, 5.1977, 5.3541, 5.5090, 5.4160, 5.3243, 5.4772,\n 5.6286, 5.7785, 5.9270, 5.8358, 5.7458, 5.6569, 5.5690, 5.7155, 5.8606,\n 5.7735, 5.6874, 5.6023, 5.7457, 5.6614, 5.8034, 5.9442, 5.8605, 6.0000,\n 5.9171, 6.0553, 5.9732, 5.8919, 5.8114, 5.7318, 5.8684, 5.7894, 5.9247,\n 6.0590, 6.1923, 6.3246, 6.4558, 6.5861, 6.7155, 6.8439, 6.9714, 7.0980,\n 7.2236, 7.3485, 7.2691, 7.1904, 7.3143, 7.4373, 7.3592, 7.4813, 7.4039,\n 7.3271, 7.4483, 7.3721, 7.4924, 7.4168, 7.5364, 7.4613, 7.3869, 7.3131,\n 7.2399, 7.3584, 7.2857, 7.4034, 7.3312, 7.4482, 7.3765, 7.4927, 7.6082,\n 7.5369, 7.6517, 7.7658, 7.8793, 7.9921, 8.1043, 8.2158, 8.1448, 8.0742,\n 8.1851, 8.1150, 8.2252, 8.1556, 8.2652, 8.1960, 8.1273, 8.0591, 7.9913,\n 8.1001, 8.2084, 8.3161, 8.4232, 8.5298, 8.6359, 8.7414, 8.8464, 8.7788,\n 8.8832, 8.9872, 9.0906, 9.0233, 9.1262, 9.2287, 9.1617, 9.0952, 9.0292,\n 9.1310, 9.0653, 9.1667, 9.1013, 9.2022, 9.3026, 9.2376, 9.3375, 9.2729,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nheroes \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.0605, -1.1111, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.4535, -1.4967, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.5828, -1.6230, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.7045, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.6418, 8.5337, 8.6678, 8.8007, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.3447, 13.4390, 13.3537, 13.4477, 13.3631,\n 13.2791, 13.3728, 13.2895, 13.2068, 13.3002, 13.3933, 13.4859, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.5985, 13.5179, 13.6091, 13.5292, 13.6201,\n 13.5408, 13.6313, 13.7215, 13.6429, 13.5647, 13.6546, 13.5771, 13.6667,\n 13.7559, 13.8447, 13.7679, 13.8564, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\noblivious to the existence of this film \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.0303, -2.0739, -2.1172, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.3651, -2.4037, -2.4421, -2.4803, -2.3351, -2.3735,\n -2.4116, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.6319, -2.6667,\n -2.7013, -2.7358, -2.6047, -2.6393, -2.6737, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.8458, 6.7333, 6.8876, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.0553, 7.2016, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.3638, 7.5032, 7.4044, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.6210, 7.7555, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.4868, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.5030, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 8.9113,\n 9.0267, 8.9448, 9.0595, 8.9783, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.4299, 9.5381, 9.4619, 9.3863, 9.4939,\n 9.6008, 9.7072, 9.6322, 9.7380, 9.6635, 9.7688, 9.6948, 9.6214,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.9642, 9.8918, 9.9944, 9.9224,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.0848, 10.1855, 10.1149, 10.2151,\n 10.3148, 10.2447, 10.3439, 10.4427, 10.5410, 10.4713, 10.5692, 10.5000,\n 10.5974, 10.5286, 10.4603, 10.5573, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nsharply \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.2517, -2.1131, -2.1509, -2.1884, -2.0512, -2.0889,\n -2.1264, -1.9906, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.2258, -2.2618, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.1086, -2.1444, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.5347, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 8.9935, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.9754, 9.8590, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.4042, 10.5243, 10.4169,\n 10.5363, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.4945, 11.6041, 11.7130, 11.6139, 11.7222, 11.8299, 11.7320, 11.8392,\n 11.9457, 11.8491, 11.7533, 11.8594, 11.9650, 11.8704, 11.9754, 12.0798,\n 11.9863, 11.8937, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.5987, 13.6931,\n 13.7870, 13.6990, 13.6117, 13.5250, 13.6188, 13.5329, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.7350, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.0248, 13.9427, 13.8613, 13.9515, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.4046, 14.4923, 14.4126, 14.5000,\n 14.4208, 14.5080, 14.4294, 14.5162, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe entire point of a shaggy dog story , of course , is that it goes nowhere , and this is classic nowheresville in every sense . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.3962, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.4872, -2.5247, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.3912, -2.4283, -2.2892, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.4", + "p value": "5.07e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.5347, 8.6817, 8.5491, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.4560, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.5021, 9.6309, 9.5191, 9.6470, 9.5368, 9.6638,\n 9.7897, 9.9146, 9.8064, 9.9304, 10.0535, 9.9469, 9.8416, 9.9640,\n 9.8601, 9.7574, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 10.8498, 10.7517,\n 10.8647, 10.7678, 10.6719, 10.5769, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.4857,\n 11.5917, 11.5005, 11.4101, 11.5156, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.5797, 11.6827, 11.7851, 11.8870, 11.8010,\n 11.9024, 12.0032, 11.9181, 11.8336, 11.9341, 11.8503, 11.7672, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.6785, 12.5979, 12.6930, 12.6130, 12.5336,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.2288, 13.3196, 13.2429, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.3615])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nsometimes dry \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, 0.0452, 0.0000, -0.0449, -0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.0000, -0.0411, -0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.7543, 6.9570, 6.7402, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.3051, 7.4838, 7.6594, 7.4878, 7.6613, 7.8320, 7.6667,\n 7.8355, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.6461, 8.7967, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.0990, 9.2418, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 10.0664, 9.9351, 10.0673, 10.1982, 10.0698,\n 10.1999, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.1990, 11.3161, 11.4323, 11.3189, 11.4345, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.6683, 11.7803, 11.8915, 12.0020,\n 11.8944, 12.0044, 12.1136, 12.2221, 12.1164, 12.2244, 12.1200, 12.2275,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.6529, 12.7567, 12.6557,\n 12.7590, 12.8618, 12.7622, 12.6635, 12.7660, 12.8679, 12.9692, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.2722, 13.3710, 13.4691, 13.3737, 13.4715,\n 13.5688, 13.4745, 13.5714, 13.4780, 13.5746, 13.6707, 13.7663, 13.8615,\n 13.7694, 13.8642, 13.9585, 14.0524, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.1510, 14.2433, 14.1543, 14.2464, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.5226, 14.6126, 14.7023, 14.6155, 14.7049, 14.7939, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.3226,\n 15.4085, 15.3247, 15.4103, 15.4956, 15.4126, 15.4976, 15.5823, 15.6667,\n 15.7507, 15.8344, 15.9178, 16.0009, 15.9191, 16.0019, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nas they come , already having been recycled more times than i 'd care to count \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.1562, 0.0000, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.0998, -0.1491, -0.1980,\n -0.0493, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, -0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.2689, 0.2234, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.0865, 0.2158, 0.3443, 0.3004, 0.2568, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.2909, 0.4145, 0.3721, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "159", + "Fraction of T in Greenlist": "79.9%", + "z-score": "17.9", + "p value": "7.69e-72", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.6786, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 10.2075, 10.3347, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.5470, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.0677, 13.1707, 13.2730, 13.3747, 13.4758, 13.5764, 13.6763,\n 13.5724, 13.6720, 13.7710, 13.8695, 13.9675, 14.0649, 14.1618, 14.0601,\n 14.1567, 14.2527, 14.3483, 14.4433, 14.5379, 14.6319, 14.5324, 14.6262,\n 14.7195, 14.8124, 14.9048, 14.9967, 15.0882, 14.9907, 15.0819, 15.1727,\n 15.2631, 15.3530, 15.4425, 15.5316, 15.4360, 15.5249, 15.6133, 15.7014,\n 15.7890, 15.8763, 15.9632, 15.8694, 15.9561, 16.0424, 16.1283, 16.2139,\n 16.2990, 16.3839, 16.2917, 16.3764, 16.4607, 16.5446, 16.6282, 16.7115,\n 16.7944, 16.7039, 16.7866, 16.8690, 16.9511, 17.0328, 17.1143, 17.1954,\n 17.1064, 17.1873, 17.2680, 17.3483, 17.4284, 17.5081, 17.5875, 17.5000,\n 17.5793, 17.6583, 17.7370, 17.8154, 17.8935, 17.9714, 17.8852])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ncovers this territory with wit and originality , suggesting that with his fourth feature \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -1.5492, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -1.7778, -1.6037, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.0881, -2.1284, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.4283, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.3368, -2.3734, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -0.9272, -0.5164, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.0494, 2.3094, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.7952, 2.6726, 2.9055, 2.7852, 2.6681, 2.8943, 2.7791, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 3.8146, 4.0119,\n 3.9001, 4.0937, 3.9837, 4.1740, 4.0657, 3.9595, 3.8552, 3.7528,\n 3.9386, 4.1219, 4.0205, 4.2008, 4.1008, 4.2784, 4.1797, 4.0825,\n 4.2571, 4.4296, 4.3333, 4.5034, 4.4083, 4.5760, 4.7419, 4.6476,\n 4.8113, 4.7181, 4.8797, 5.0395, 5.1977, 5.3541, 5.2614, 5.4160,\n 5.3243, 5.2338, 5.3865, 5.5377, 5.6875, 5.8358, 5.7458, 5.8926,\n 5.8035, 5.7155, 5.6285, 5.7735, 5.6874, 5.8310, 5.9732, 6.1143,\n 6.2541, 6.3928, 6.3070, 6.2222, 6.3595, 6.2755, 6.4116, 6.5465,\n 6.6804, 6.8133, 6.7298, 6.8615, 6.7788, 6.6968, 6.8274, 6.9570,\n 6.8757, 7.0043, 7.1319, 7.2587, 7.3845, 7.5094, 7.6335, 7.5526,\n 7.4724, 7.5955, 7.7178, 7.6383, 7.7597, 7.6808, 7.6026, 7.7232,\n 7.8429, 7.9619, 8.0801, 8.0024, 8.1198, 8.2365, 8.3525, 8.2754,\n 8.1988, 8.3140, 8.2381, 8.3526, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.2450, 9.3537, 9.4619, 9.3863, 9.3113,\n 9.4188, 9.5258, 9.6322, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.9340, 10.0371, 9.9642, 10.0668, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.0547, 9.9837, 10.0848, 10.0143, 9.9442, 9.8746,\n 9.9752, 9.9060, 10.0061, 10.1058, 10.2050, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.8872, 10.9829, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na $ 40 million version of a game \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "11.6%", + "z-score": "-4.38", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.5304, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -2.8928, -2.9317, -2.9704,\n -3.0089, -3.0471, -3.0851, -3.1229, -3.1604, -3.1977, -3.2348, -3.2717,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.4780, -3.5131, -3.5480, -3.5827, -3.6172, -3.6515,\n -3.6856, -3.7196, -3.7534, -3.7870, -3.8205, -3.8538, -3.8869, -3.9198,\n -3.7750, -3.8081, -3.8411, -3.8740, -3.9067, -3.7641, -3.7970, -3.8297,\n -3.8623, -3.8947, -3.9269, -3.9590, -3.9910, -4.0228, -4.0545, -4.0860,\n -4.1174, -4.1487, -4.1798, -4.2108, -4.2416, -4.2723, -4.3029, -4.3333,\n -4.3637, -4.3938, -4.4239, -4.4538, -4.4837, -4.5134, -4.3792])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.6960, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.2283, 10.3532, 10.4770, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 10.9794, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.3399, 11.4531, 11.5655,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.2314, 12.1295, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.6592, 12.7622, 12.8645, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.2722, 13.3710, 13.4691, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.9530, 13.8593, 13.9543, 14.0488,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.3301, 14.4225, 14.5144, 14.4234,\n 14.5150, 14.6062, 14.5161, 14.6070, 14.6976, 14.7877, 14.8773, 14.9666,\n 15.0555, 14.9669, 15.0555, 15.1438, 15.0560, 15.1440, 15.2316, 15.3188,\n 15.4057, 15.4922, 15.5783, 15.4919, 15.5778, 15.6634, 15.5778, 15.6631,\n 15.7481, 15.8327, 15.9170, 16.0009, 16.0845, 16.0002, 16.0836, 16.1667,\n 16.0832, 16.1660, 16.2486, 16.3308, 16.4127, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ngorgeous and deceptively minimalist \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.2472, -0.9685, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -1.8935, -1.9311, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.1801, -2.2156, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.4870, 5.3666, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.6573, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.0812, 6.9824, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.2104, 7.3485, 7.2532, 7.1591, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 9.1273, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.5057, 9.4185, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.4450, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.3754, 12.4722,\n 12.3908, 12.4872, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.9755, 13.0688, 12.9891, 13.0821, 13.1746, 13.2668, 13.3585, 13.4499,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ncross swords with the best of them and \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -1.0890, -0.9608, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.4659, 2.3190, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.3716, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.8812, 6.0469, 5.9346, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 5.9386,\n 6.0943, 5.9932, 5.8936, 6.0474, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.7555, 7.8889, 7.7937, 7.9259,\n 7.8318, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 8.8860, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 10.0504, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.2348, 10.3445, 10.4537, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.8204, 10.7367, 10.6537, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 11.0569, 11.1588, 11.2602, 11.1807, 11.2816, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.4674, 11.5655, 11.6632,\n 11.7604, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.2209, 12.1468, 12.2403, 12.3333,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nas a fringe feminist conspiracy theorist \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.1822, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.1822,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.3238, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.2111, -2.2528, -2.2943, -2.3354, -2.1783,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.1284, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -1.9941, -2.0339, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.0613, -1.9211, -1.9599,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.0282, -2.0656, -2.1028, -1.9686, -2.0059, -1.8728,\n -1.9101, -1.7780, -1.8155, -1.8527, -1.8898, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 6.9307, 6.7489, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.4936, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 9.8015, 9.9351, 10.0673, 10.1982, 10.0698,\n 10.1999, 10.3287, 10.4565, 10.3310, 10.4579, 10.5837, 10.4608, 10.3397,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.5997, 10.7215, 10.8423,\n 10.9621, 10.8477, 10.9669, 11.0851, 10.9727, 10.8616, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.1111, 11.2259, 11.3399, 11.4531, 11.3473,\n 11.4599, 11.5718, 11.4675, 11.3644, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.6041, 11.7130, 11.8212, 11.9288, 11.8299, 11.9370, 12.0433,\n 11.9457, 11.8491, 11.7533, 11.6584, 11.7647, 11.8704, 11.9754, 12.0798,\n 12.1836, 12.2868, 12.1936, 12.2963, 12.3985, 12.3063, 12.4081, 12.5093,\n 12.4181, 12.3277, 12.2381, 12.3391, 12.2503, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.2034, 12.3027, 12.4015, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.4430, 12.3603, 12.2782, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.3263, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.4547, 12.3764, 12.2987, 12.2214, 12.1447, 12.2397, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.5394, 12.6323, 12.5568, 12.6494, 12.7416, 12.6667,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.7928, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nproves once again he has n't lost his touch , bringing off a superb performance in an admittedly middling film . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -0.8914, -0.7620, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.2075,\n 6.0412, 5.8797, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.4017, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.2121, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.2229, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.6894, 10.8012, 10.7074, 10.6145,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 11.8427, 11.9455, 12.0476, 11.9594, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.2758, 12.1893, 12.1036, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.2895, 13.3829, 13.4758, 13.3933, 13.4859, 13.5781,\n 13.6698, 13.7612, 13.6796, 13.5985, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.3248, 14.4126, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.5162, 14.6027, 14.6889, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ndisappointments \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 1.1206, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.2719, 1.4313, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.5097, 1.4517, 1.3943, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.5191, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.6941, 1.6432,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.4570, 1.5848, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.6444, 1.7693, 1.7213, 1.8453, 1.9686, 2.0913, 2.0430,\n 1.9950, 2.1167, 2.0688, 2.0212, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "65.2%", + "z-score": "13", + "p value": "3.27e-39", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.7992, 6.9803, 6.8214, 7.0000,\n 6.8457, 6.6953, 6.8718, 6.7254, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.5435, 8.4285, 8.3152, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.3333, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.5668, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.8702, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.7006, 10.8051, 10.9091, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.2602, 11.3610, 11.4614, 11.5613, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.4674, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe horrors \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.3303, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.2010, -0.2503, -0.0998, 0.0497, 0.0000,\n 0.1480, 0.2949, 0.4407, 0.5855, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.9492, 1.0879, 1.0371, 1.1746, 1.3112,\n 1.4470, 1.5818, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.3933, 1.3443, 1.2956, 1.4254,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.5363, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.3474, 1.3019, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.9333, 9.7989, 9.9333, 10.0664, 9.9351, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.5859,\n 10.7098, 10.8327, 10.9546, 11.0755, 11.1954, 11.0761, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.5492,\n 11.6631, 11.7762, 11.8885, 11.7778, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.6772, 11.7881, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.2314, 12.1295, 12.2360, 12.3419, 12.4471, 12.3468, 12.2474,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.3809, 12.4834, 12.3895, 12.4915, 12.3985, 12.5001, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.0311, 13.1279, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.3361, 13.2499, 13.3447, 13.2593, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.9111, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.1974, 14.1149, 14.2046, 14.2939, 14.3828, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.5726, 14.6599, 14.7468, 14.8333,\n 14.7533, 14.6738, 14.7601, 14.6812, 14.7673, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na muddle splashed with bloody beauty as vivid as any scorsese has ever given us . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -0.8076, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.3428, -0.1952, -0.2431, -0.2907, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.1873,\n 0.3267, 0.4652, 0.4174, 0.5547, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.2146, -0.2568, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.2089, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.2916, 8.1862, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.7678, 9.6732, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 10.9107, 10.8224, 10.9301, 11.0371, 10.9497, 11.0562, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.7326, 11.8336, 11.9341, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.1646, 12.0824, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 12.8313, 12.9244, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.2288, 13.1520, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.1966, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nmany pointless \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.2205, 3.4641, 3.3221, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.0825, 3.9614, 3.8431, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.9528, 5.1326, 5.0190, 4.9075,\n 4.7980, 4.9747, 5.1490, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 5.9932, 5.8936, 5.7955, 5.6986, 5.8522, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.7414, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 6.8834, 7.0201,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.3765, 7.5076, 7.6376,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.4193, 7.3346, 7.2508, 7.1678,\n 7.0857, 7.2134, 7.3402, 7.4661, 7.5910, 7.7152, 7.6335, 7.7567,\n 7.8791, 8.0006, 7.9196, 7.8393, 7.7597, 7.8803, 8.0002, 8.1192,\n 8.0402, 8.1585, 8.0801, 8.1976, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.6190, 8.5424, 8.6556, 8.7681, 8.8800, 8.8039,\n 8.7284, 8.6535, 8.5792, 8.6903, 8.8008, 8.7270, 8.8369, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.1242, 9.2311, 9.3374, 9.2651,\n 9.1932, 9.2990, 9.2276, 9.1567, 9.0863, 9.0164, 8.9469, 8.8780,\n 8.9830, 9.0876, 9.1916, 9.2952, 9.3982, 9.3295, 9.4320, 9.5341,\n 9.6356, 9.5673, 9.4995, 9.4321, 9.3651, 9.4661, 9.5666, 9.5000,\n 9.6000, 9.5338, 9.6334, 9.7325, 9.8311, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na beautifully \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.1741, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.0000,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.6%", + "z-score": "11.9", + "p value": "6e-33", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.4667, 6.3578, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.2104, 7.1152, 7.0211, 7.1591, 7.0662, 6.9743, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.3093, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.9496, 8.8631, 8.7773,\n 8.8958, 9.0134, 8.9285, 8.8443, 8.7610, 8.6783, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.6814, 9.6016, 9.5224, 9.4438, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.0987, 10.2029, 10.3065, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.2607, 11.3572, 11.2848, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056, 11.8988])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ncontrived , well-worn situations \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.3%", + "z-score": "1.72", + "p value": "0.0424", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.3651, 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 1.0915, 1.0290, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.9372, 0.8847, 1.0284, 0.9759, 1.1183, 1.0659, 1.2070, 1.1547,\n 1.2946, 1.4335, 1.5714, 1.5187, 1.6554, 1.6028, 1.5505, 1.6859,\n 1.6337, 1.5818, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.6432,\n 1.5926, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.0328, 1.1587, 1.1127, 1.0670, 1.0215,\n 1.1461, 1.2700, 1.3933, 1.3474, 1.4699, 1.4241, 1.5457, 1.5000,\n 1.6208, 1.5752, 1.5298, 1.6496, 1.7688, 1.7233])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.0551, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.2776, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.4006, 6.2994, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640,\n 6.5069, 6.4153, 6.3248, 6.2354, 6.1470, 6.2883, 6.4283, 6.5672,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.5807, 6.7159, 6.6308, 6.7648,\n 6.6804, 6.5970, 6.7298, 6.6471, 6.7788, 6.6968, 6.6157, 6.7462,\n 6.8757, 7.0043, 6.9237, 7.0513, 7.1779, 7.0980, 7.2236, 7.3485,\n 7.4724, 7.5955, 7.5161, 7.6383, 7.7597, 7.6808, 7.8014, 7.9212,\n 7.8429, 7.9619, 8.0801, 8.1976, 8.3143, 8.2365, 8.3525, 8.4678,\n 8.3906, 8.5052, 8.6190, 8.5424, 8.4664, 8.5796, 8.5041, 8.6166,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.8008, 8.7270, 8.6537, 8.7636,\n 8.8728, 8.8000, 8.7278, 8.6560, 8.7646, 8.6933, 8.6226, 8.7305,\n 8.8379, 8.9447, 8.8744, 8.9806, 8.9107, 9.0164, 9.1215, 9.2261,\n 9.3302, 9.2607, 9.3642, 9.4673, 9.5698, 9.6719, 9.6028, 9.5341,\n 9.6356, 9.7367, 9.8373, 9.9374, 9.8691, 9.8012, 9.7337, 9.8333,\n 9.7663, 9.8654, 9.9641, 9.8974, 9.8311, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na doa \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -0.9949, -1.0580, -1.1202, -0.9036, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.6036,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 3.8146, 4.0119,\n 3.9001, 4.0937, 3.9837, 4.1740, 4.3614, 4.5461, 4.4371, 4.6188,\n 4.5115, 4.6904, 4.8669, 5.0410, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.4000, 5.5630, 5.7242, 5.6220, 5.7812, 5.9386,\n 6.0943, 5.9932, 6.1471, 6.2994, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.3035, 6.4510, 6.5970, 6.7416, 6.8849, 7.0268, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.2532, 7.3901, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.9630, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.2372, 8.1481, 8.0598, 7.9724, 7.8859, 8.0111,\n 8.1354, 8.0497, 7.9649, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 8.8778, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.1735, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 9.9562, 10.0631, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.1469, 10.2516, 10.1749, 10.2790, 10.3827, 10.4858, 10.4097, 10.5123,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 10.8673, 10.9669,\n 11.0661, 10.9917, 10.9178, 10.8444, 10.7714, 10.8702, 10.9685, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.4766, 11.4047, 11.5000,\n 11.5948, 11.5235, 11.6179, 11.5470, 11.6411, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\npoor ben bratt could n't find stardom if mapquest emailed him point-to-point driving directions . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.0186, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.2740, 10.1754, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.3496, 10.2554, 10.3695, 10.4829, 10.3898, 10.2975, 10.2062,\n 10.1157, 10.0261, 9.9373, 9.8494, 9.9625, 10.0748, 9.9878, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.1692, 10.0881, 10.0076, 9.9278,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.6144, 10.5388, 10.6404, 10.5654, 10.6665, 10.7671, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.5489, 11.4766, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.6179, 11.7120, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nto be as subtle and touching as the son 's room \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.6081, -1.6521, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.2740, -2.3094, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.5553, 8.6770, 8.7978, 8.7104, 8.8304, 8.7439, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.9285, 9.0453, 8.9612, 9.0773, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.5381, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.7072, 9.6322, 9.7380, 9.8433, 9.7688, 9.8736, 9.9778,\n 9.9038, 10.0074, 9.9340, 9.8611, 9.9642, 10.0668, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.2273, 10.1558, 10.0848, 10.1855, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.3439, 10.4427, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.4312, 10.5286, 10.6256, 10.5573, 10.6538, 10.5859, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nstarts with a legend \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.1644, 0.0000,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.7457, 2.0370, 1.8974, 2.1776, 2.4495,\n 2.3116, 2.5744, 2.8301, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 8.9314, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 9.8389, 9.9547, 10.0698, 9.9783, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.3630, 10.4738, 10.5841, 10.4956,\n 10.6052, 10.7141, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.0102, 11.1151, 11.2194, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.3616, 11.4638, 11.5655, 11.4829, 11.5841, 11.6847,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 11.9197, 11.8393, 11.9377,\n 12.0355, 11.9558, 12.0532, 12.1502, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.2812, 12.3764, 12.4713, 12.3935, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.8007, 12.7248, 12.8169, 12.9087, 12.8333,\n 12.9247, 13.0157, 12.9410, 13.0316, 13.1219, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfar less sophisticated and \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.8944, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.9393, 0.8716, 0.8047, 0.9847,\n 0.9180, 0.8520, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.2060, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 1.1094, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.9629, 0.9165, 0.8704,\n 0.9981, 0.9520, 0.9062, 1.0328, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.2244, 1.1790, 1.1339, 1.0890, 1.0444, 1.0000,\n 0.9558, 1.0777, 1.0336, 0.9897, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.2121, 8.3560, 8.4984, 8.3795, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.9469, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.2740, 10.3908, 10.2923, 10.1948, 10.3110, 10.2146, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.0554, 11.1640, 11.0724, 11.1803, 11.2877,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.6713, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 12.9840, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.3728, 13.4661, 13.5589, 13.4758, 13.5683, 13.4859, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.5985, 13.6896, 13.7803, 13.8707, 13.9606,\n 13.8804, 13.8007, 13.8904, 13.9797, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nrich veins of funny stuff in this movie \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.2222, -2.0461, -2.0918, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -1.8071, -1.8524, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -1.9242, -1.9673, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -1.9245,\n -1.9658, -2.0068, -1.8571, -1.8983, -1.7500, -1.7913, -1.8324, -1.6859,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.5423, -1.5828, -1.4427, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.6087, -1.6466, -1.5159, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.2435, -1.1163, -1.1547, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "157", + "Fraction of T in Greenlist": "78.9%", + "z-score": "17.6", + "p value": "2.59e-69", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 8.8029, 8.9567, 9.1084, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.9653, 10.1036,\n 9.9540, 10.0915, 10.2275, 10.3621, 10.4952, 10.6270, 10.4834, 10.3423,\n 10.4739, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.3389, 11.4599, 11.5799, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 12.0386, 12.1533, 12.2671, 12.3801, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.7017, 12.8110, 12.6909, 12.5723, 12.6815,\n 12.5646, 12.6735, 12.7815, 12.8889, 12.9955, 13.1015, 13.2067, 13.3113,\n 13.4152, 13.5185, 13.4057, 13.5086, 13.6109, 13.4999, 13.6019, 13.7032,\n 13.8039, 13.9040, 14.0036, 14.1025, 14.2009, 14.2988, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.6736, 14.5682, 14.4639, 14.5593, 14.4562,\n 14.5513, 14.6459, 14.7400, 14.8337, 14.9269, 15.0195, 15.1118, 15.2036,\n 15.2949, 15.1946, 15.2857, 15.3764, 15.2774, 15.3678, 15.4578, 15.5473,\n 15.6365, 15.7252, 15.8135, 15.9014, 15.9889, 15.8923, 15.9796, 16.0665,\n 16.1531, 16.2392, 16.3250, 16.4104, 16.4954, 16.4009, 16.4857, 16.3920,\n 16.4767, 16.5610, 16.4684, 16.5525, 16.6363, 16.7197, 16.8028, 16.8855,\n 16.7944, 16.8770, 16.9592, 17.0411, 16.9511, 17.0328, 17.1143, 17.1954,\n 17.2762, 17.1873, 17.0991, 17.1799, 17.2604, 17.3406, 17.4204, 17.5000,\n 17.5793, 17.4925, 17.5716, 17.4855, 17.5644, 17.6431, 17.5578])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nno apparent joy \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "8.5%", + "z-score": "-5.36", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.6833, -2.7351, -2.7863, -2.8368, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.3190, -3.3619, -3.4044, -3.4466,\n -3.2435, -3.2863, -3.3288, -3.3708, -3.4125, -3.4538, -3.4949, -3.5355,\n -3.5759, -3.6159, -3.6556, -3.6950, -3.7342, -3.5443, -3.5839, -3.6233,\n -3.6623, -3.7011, -3.7396, -3.7778, -3.8157, -3.8534, -3.8908, -3.9279,\n -3.9648, -3.7852, -3.8225, -3.8596, -3.8965, -3.9331, -3.9694, -4.0056,\n -4.0415, -4.0771, -4.1126, -4.1478, -4.1828, -4.2176, -4.0473, -4.0825,\n -4.1175, -4.1522, -4.1868, -4.2212, -4.2553, -4.2893, -4.3231, -4.3566,\n -4.3900, -4.4233, -4.2604, -4.2940, -4.3273, -4.3605, -4.3935, -4.4264,\n -4.4590, -4.4915, -4.5238, -4.5560, -4.5879, -4.6198, -4.6514, -4.4956,\n -4.5276, -4.5594, -4.5910, -4.6225, -4.6538, -4.6850, -4.7160, -4.7469,\n -4.7777, -4.8083, -4.8387, -4.6887, -4.7194, -4.7500, -4.7804, -4.8107,\n -4.8409, -4.8709, -4.9008, -4.9305, -4.9601, -4.9896, -5.0190, -5.0483,\n -4.9038, -4.9333, -4.9626, -4.9918, -5.0210, -5.0499, -5.0788, -5.1075,\n -5.1362, -5.1647, -5.1931, -5.2213, -5.0815, -5.1100, -5.1384, -5.1667,\n -5.1948, -5.2229, -5.2508, -5.2786, -5.3064, -5.3340, -5.3615])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.3221, 3.5590, 3.4207, 3.2863,\n 3.5165, 3.7417, 3.6098, 3.8297, 3.7009, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.6976, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 9.0057, 9.1273, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.5057, 9.4185, 9.3320, 9.2463, 9.1615, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.9249, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 9.8197, 9.7405,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.0910, 10.1968, 10.1189, 10.2242,\n 10.1469, 10.2516, 10.3557, 10.2790, 10.3827, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.7050, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 11.9380, 12.0327, 12.1270, 12.0529, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.0209, 12.1141, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nshot on ugly digital video \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -0.9631, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.3663, -1.4100, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.2521, -1.2943, -1.3362, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.2210, -1.2623, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.4938, -1.3620,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.3333,\n -1.3714, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 7.9455, 7.8428, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.2825, 8.4116, 8.3164, 8.2222, 8.1291, 8.2572, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.1481, 8.2733, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 9.8150,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.7908, 9.8995, 9.8197, 9.7405,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.9067, 10.0131, 9.9357, 9.8590,\n 9.7828, 9.7072, 9.8131, 9.9184, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.3617, 10.2872, 10.3893, 10.3154, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.2273, 10.3280, 10.4281, 10.3566, 10.4563, 10.3853,\n 10.3148, 10.2447, 10.1750, 10.1058, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.4312, 10.3628, 10.2949, 10.2273, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n... a sour little movie at its core ; an exploration of the emptiness that underlay the relentless gaiety of the 1920 's ... the film 's ending has a `` what was it all for ? '' \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 1.0120, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.1898, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.3166, 1.2611, 1.2060, 1.3517, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.3663, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.5505, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.6641, 1.6127, 1.7454, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.1623, 3.0072, 2.8577,\n 3.1156, 2.9704, 2.8301, 3.0792, 3.3221, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 5.8919, 5.7735,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.2016, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.2900, 7.2001, 7.3333, 7.4655, 7.5967, 7.5076, 7.6376,\n 7.5494, 7.6785, 7.5912, 7.7192, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.7155, 7.8406, 7.9649, 7.8808, 7.7976, 7.9209, 7.8384, 7.7567,\n 7.8791, 8.0006, 8.1214, 8.2413, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.3212, 9.4299, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.4513, 9.5577, 9.4837, 9.4103, 9.5161, 9.6214,\n 9.5485, 9.6532, 9.7574, 9.8611, 9.9642, 10.0668, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.2273, 10.3280, 10.2565, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.7090, 10.8064, 10.9034, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.2171, 11.3120, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthough ford and neeson capably hold our interest , but its just not a thrilling movie \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.5426, 0.6881, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.8340, 0.7878, 0.9165, 1.0445,\n 0.9981, 1.1251, 1.0788, 1.0328, 0.9870, 0.9415, 0.8963, 1.0215,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.9215, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.9897, 0.9461, 0.9027, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.3054, 7.1187, 6.9378,\n 6.7625, 6.9488, 7.1317, 6.9631, 7.1435, 7.3208, 7.4952, 7.3333,\n 7.1756, 7.0219, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 7.9931, 7.8667, 8.0167, 7.8928,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.3425, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.3333, 9.2351, 9.1380, 9.0419, 8.9469,\n 9.0702, 8.9763, 9.0987, 9.2202, 9.3408, 9.2480, 9.3678, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.6828, 9.5938,\n 9.5057, 9.4185, 9.3320, 9.2463, 9.3617, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.1429, 10.0611, 10.1692, 10.0881, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.7175, 10.6397, 10.5625, 10.6650, 10.5884, 10.5123,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 11.1173, 11.0418, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.1886, 11.2864, 11.2126, 11.1392, 11.0663,\n 11.1637, 11.0913, 11.1883, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.9701, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nis pretty damned funny . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.4485, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.4938, -1.5323,\n -1.5706, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.5377, -1.5752, -1.6125, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 2.2011, 2.0889, 1.9795, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.6765, 2.8808, 2.7811, 2.6833, 2.5873, 2.4930, 2.6914, 2.5981,\n 2.5064, 2.7005, 2.8919, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.9057, 2.8189, 2.7333, 2.9140, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.0151, 2.9329, 3.1052, 3.0237, 2.9433, 3.1129, 3.2806, 3.4466,\n 3.3659, 3.2863, 3.2077, 3.1300, 3.2928, 3.2157, 3.1394, 3.2998,\n 3.4586, 3.6159, 3.5396, 3.4641, 3.3895, 3.3156, 3.4702, 3.3968,\n 3.3243, 3.4768, 3.6279, 3.7778, 3.7051, 3.6332, 3.5620, 3.4915,\n 3.6389, 3.5689, 3.4995, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.8002, 3.7330, 3.6664, 3.8061, 3.9448, 4.0825,\n 4.0158, 3.9497, 3.8841, 3.8191, 3.9549, 3.8903, 3.8262, 3.9606,\n 4.0941, 4.2267, 4.1625, 4.0988, 4.0356, 3.9729, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.1210,\n 4.2475, 4.1872, 4.1273, 4.2527, 4.3774, 4.5013, 4.4413, 4.3818,\n 4.3226, 4.2639, 4.3865, 4.3280, 4.2699, 4.3915, 4.5123, 4.6325,\n 4.5744, 4.5166, 4.4593, 4.4023, 4.5212, 4.4644, 4.4080, 4.5260,\n 4.6434, 4.7602, 4.7037, 4.6476, 4.5918, 4.5364, 4.6520, 4.5968,\n 4.5419, 4.6567, 4.7709, 4.8845, 4.8295, 4.7749, 4.7206, 4.6667,\n 4.7792, 4.7255, 4.6720, 4.7838, 4.8950, 5.0057, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwe never feel anything for these characters \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, 0.0558, 0.0000, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, 0.0470, 0.0000,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.0416, -0.0829, 0.0413, 0.0000, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 5.9604, 6.1968, 6.4254, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 6.9830, 7.1832, 6.9570, 7.1550, 7.3485,\n 7.1358, 7.3271, 7.1241, 7.3131, 7.1187, 6.9307, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.1435, 7.3208, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.1016, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.9169, 9.0536, 8.9355, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.5743, 9.4685, 9.5939, 9.4896, 9.6141, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.3496, 10.4636, 10.5769, 10.6894, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.9637, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.2142, 11.1253, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.2857, 11.2001, 11.3043, 11.4080, 11.3232, 11.2390,\n 11.3423, 11.2589, 11.3616, 11.4638, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.3764, 12.2987, 12.3935, 12.4880, 12.4109, 12.3342, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.3888, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.5183, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n's a lousy one at that \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.6025, -2.6485, -2.6941, -2.7393, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.5322, -2.3564, -2.1822,\n -2.2268, -2.0548, -2.0997, -1.9298, -1.9749, -2.0197, -2.0641, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.0761, -1.9245,\n -1.7740, -1.8157, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.7408,\n -1.6057, -1.4713, -1.5104, -1.3771, -1.4162, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.4846, -1.5220, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.2410, 9.1287, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.5909, 10.7066, 10.6061, 10.5067, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.2127, 11.3222, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.9060, 11.8151, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.3629,\n 12.4625, 12.3754, 12.2891, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 12.9116, 12.8285,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.4263, 13.3463, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.9007, 13.9897, 13.9113, 14.0000,\n 13.9221, 13.8447, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe corporate circus that is the recording industry in the current climate of mergers and downsizing \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -2.0605, -2.1264, -2.1909,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.8295, -2.8808, -2.9314, -2.9814, -3.0308, -3.0796, -2.8368, -2.8868,\n -2.9361, -2.9848, -3.0330, -2.8006, -2.8497, -2.6222, -2.3982, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.7000, -1.7496, -1.5667, -1.6166, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -0.9909, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.1459, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.4286, 0.3797, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.8645, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.9272, 0.8805, 1.0096, 0.9629, 1.0911, 1.0445,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.9870, 0.9415, 0.8963, 1.0215,\n 0.9763, 0.9313, 0.8866, 1.0106, 1.1339, 1.0890, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.1124, 9.9980, 9.8852, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.0368, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.3655, 12.2694, 12.3729, 12.4759,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.7875, 12.8877, 12.7943, 12.7017,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.1198, 13.2166, 13.3128, 13.4086, 13.5039, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.7926, 13.7054, 13.7986, 13.8914, 13.9838, 14.0758,\n 13.9896, 13.9042, 13.9959, 14.0872, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.1974, 14.2870, 14.3762, 14.4651, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 15.0000,\n 14.9195, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe storylines are woven together skilfully , the magnificent swooping aerial shots are breathtaking , and the overall experience is awesome . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.9507, 4.7819, 5.0037, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.5749, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.7635, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.4289, 12.3455, 12.2627, 12.3603, 12.2782, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.8165, 12.9099, 12.8313, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.5897, 13.6789, 13.6025, 13.6914, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nof the most highly-praised disappointments i \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.0735, -0.7698, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.5010, -1.3344, -1.1693, -1.2173, -1.0541,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.6713, -0.7201, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.5848, -1.6241, -1.6632, -1.5275, -1.5667,\n -1.6057, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 4.8990,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.0323, 9.1590, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 9.9315, 10.0472, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.7141, 10.8224, 10.7349, 10.8426, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.0952, 11.2001, 11.1151, 11.2194, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.5655, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 11.8210, 11.9197, 11.8393, 11.9377,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.5221, 12.6153, 12.5394, 12.6323, 12.7248, 12.8169, 12.9087, 12.8333,\n 12.9247, 12.8499, 12.9410, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nsounds like a cruel deception carried out by men of marginal intelligence , with reactionary ideas about women and a total lack of empathy . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.1103, -1.1513, -1.1921, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.0328, -1.0729, -1.1127, -1.1523, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.3197, -1.3574, -1.3950, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.0943, 5.9932, 6.1471, 6.0474, 6.1996, 6.1012, 6.2517, 6.4008,\n 6.3035, 6.4510, 6.3549, 6.5008, 6.6454, 6.7886, 6.6935, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.4663, 6.3768, 6.5169, 6.6559, 6.7937,\n 6.9303, 6.8414, 6.9768, 7.1111, 7.2443, 7.1563, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.7192, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 7.9649, 8.0882, 8.2107, 8.3324, 8.2483, 8.1650,\n 8.2858, 8.2032, 8.3231, 8.2413, 8.3605, 8.4788, 8.3977, 8.3172,\n 8.4348, 8.5516, 8.4718, 8.5879, 8.5088, 8.6241, 8.5456, 8.4678,\n 8.3906, 8.5052, 8.4286, 8.5424, 8.4664, 8.3910, 8.5041, 8.4293,\n 8.5417, 8.6535, 8.7647, 8.6903, 8.8008, 8.9107, 9.0200, 9.1287,\n 9.0548, 8.9815, 9.0895, 9.1970, 9.3040, 9.2311, 9.1587, 9.0869,\n 9.1932, 9.2990, 9.4042, 9.5089, 9.4375, 9.5416, 9.4707, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 10.0143, 9.9442, 9.8746,\n 9.9752, 9.9060, 10.0061, 10.1058, 10.2050, 10.1363, 10.2350, 10.3333,\n 10.2650, 10.1970, 10.1295, 10.2273, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nseem fresh \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.0622,\n 5.9438, 5.8275, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.0370, 7.9460, 8.0741,\n 7.9839, 8.1111, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.1414, 9.2554, 9.1735, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.3495, 9.2697, 9.3810, 9.3017, 9.4124, 9.3338, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.4299, 9.5381, 9.4619, 9.5695, 9.4939,\n 9.6008, 9.5258, 9.6322, 9.5577, 9.6635, 9.5896, 9.6948, 9.6214,\n 9.7261, 9.6532, 9.7574, 9.6850, 9.7886, 9.7167, 9.8198, 9.7483,\n 9.8510, 9.7800, 9.8821, 9.8116, 9.9132, 9.8431, 9.9442, 9.8746,\n 9.9752, 9.9060, 10.0061, 9.9374, 10.0371, 9.9687, 10.0679, 10.0000,\n 10.0987, 10.0312, 10.1295, 10.0624, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nto the dustbin of history \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.9623, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.1852, 1.3131, 1.4402, 1.3926,\n 1.3453, 1.4713, 1.4241, 1.3771, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.9623, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.1852, 1.3131, 1.4402, 1.3926,\n 1.3453, 1.4713, 1.4241, 1.3771, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nas a director , eastwood is off his game \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.0906, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.1406, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.4967, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.5206, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.6632, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.6843, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.6883, 7.5615, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 8.8853, 8.7758, 8.6678, 8.8007, 8.9324, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.0380,\n 9.9392, 9.8414, 9.9601, 9.8634, 9.7678, 9.6732, 9.7912, 9.6977,\n 9.6050, 9.5133, 9.6307, 9.5400, 9.4501, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.1615, 9.0773, 9.1927, 9.1094,\n 9.0267, 8.9448, 9.0595, 8.9783, 8.8978, 8.8179, 8.9319, 8.8527,\n 8.7742, 8.6963, 8.8095, 8.7323, 8.6556, 8.5796, 8.6921, 8.6166,\n 8.5417, 8.4674, 8.5792, 8.5054, 8.4322, 8.3595, 8.4706, 8.3984,\n 8.3268, 8.2557, 8.3660, 8.2954, 8.2252, 8.1556, 8.2652, 8.1960,\n 8.1273, 8.0591, 8.1679, 8.1001, 8.0328, 7.9659, 8.0741, 8.0076,\n 7.9415, 7.8759, 7.9833, 7.9181, 7.8533, 7.7889, 7.8956, 7.8316,\n 7.7679, 7.7047, 7.8107, 7.7478, 7.6853, 7.6231, 7.7285, 7.6667,\n 7.6052, 7.5441, 7.6488, 7.5880, 7.5276, 7.4676, 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\npays earnest homage to turntablists \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.3%", + "z-score": "1.07", + "p value": "0.143", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.6131, 1.8185, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.8766, 2.0702, 2.2611, 2.1773,\n 2.0948, 2.0135, 2.2000, 2.3842, 2.5660, 2.4841, 2.4034, 2.5820,\n 2.5019, 2.4228, 2.3448, 2.2678, 2.4423, 2.3658, 2.2902, 2.4618,\n 2.6316, 2.5560, 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.2862, 2.2162, 2.3791, 2.3094, 2.4703, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.6865,\n 1.8370, 1.7772, 1.7179, 1.8665, 1.8074, 1.7488, 1.8956, 2.0412,\n 2.1858, 2.3293, 2.2699, 2.2111, 2.1527, 2.2943, 2.2361, 2.1783,\n 2.1210, 2.0642, 2.0078, 1.9518, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.8699, 1.8157, 1.9524, 1.8983, 1.8446, 1.7913, 1.9263, 1.8732,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.7970, 1.7454, 1.8773, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.2982, 1.2514, 1.2049, 1.3303, 1.2839, 1.4084, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.0623, 4.9316, 4.8038, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.4560, 8.3521, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.3088, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 10.8186,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.4261, 11.5311, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.7851, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.3455, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.0431, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.6201,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.1863, 14.1091, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nweak and \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.8257,\n 2.0738, 1.9599, 2.2011, 2.4371, 2.3238, 2.5538, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 2.9856, 2.8804, 2.7775,\n 2.6765, 2.8808, 3.0817, 2.9814, 2.8830, 2.7863, 2.6914, 2.8868,\n 3.0793, 2.9848, 3.1741, 3.0806, 2.9887, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.3558, 3.2667, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.5283, 3.6979, 3.8657, 3.7796, 3.6947, 3.6109, 3.5282, 3.6927,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.8503, 3.7712,\n 3.6931, 3.6159, 3.5396, 3.6950, 3.8490, 3.7730, 3.6977, 3.6233,\n 3.5496, 3.7011, 3.8512, 3.7778, 3.7051, 3.6332, 3.5620, 3.7097,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.5762, 3.7205, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.8730, 3.8061, 3.7399, 3.6742,\n 3.6091, 3.7471, 3.8841, 3.8191, 3.7547, 3.6908, 3.6274, 3.7626,\n 3.6995, 3.8335, 3.7707, 3.9036, 4.0356, 3.9729, 3.9107, 3.8490,\n 3.7878, 3.7270, 3.6667, 3.6068, 3.7366, 3.6770, 3.6178, 3.5590,\n 3.5007, 3.4428, 3.3853, 3.3282, 3.2715, 3.2152, 3.1593, 3.1038,\n 3.2306, 3.1753, 3.3012, 3.4263, 3.3710, 3.3160, 3.2614, 3.2071,\n 3.1532, 3.0997, 3.0464, 3.1696, 3.1166, 3.0638, 3.0114, 2.9593,\n 2.9076, 2.8561, 2.8050, 2.9263, 3.0469, 3.1669, 3.1156, 3.0645,\n 3.1836, 3.3020, 3.2509, 3.2002, 3.1497, 3.0995, 3.0496, 3.0000,\n 2.9507, 3.0674, 3.0182, 2.9692, 2.9205, 2.8721, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.4306, 5.1640, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.0456, 7.2168, 7.0711, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 6.8419, 6.7213, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.2016, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.0060, 8.9178, 8.8304, 8.7439, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.0453, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.2240, 9.1414, 9.2554, 9.1735, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.1905, 9.1119, 9.0340, 8.9567, 8.8800, 8.8039,\n 8.7284, 8.8396, 8.9502, 9.0601, 8.9851, 8.9107, 8.8369, 8.7636,\n 8.6908, 8.8000, 8.7278, 8.6560, 8.7646, 8.8726, 8.9800, 8.9087,\n 9.0155, 9.1218, 9.0510, 9.1567, 9.2619, 9.3665, 9.2961, 9.4002,\n 9.3302, 9.2607, 9.3642, 9.4673, 9.5698, 9.6719, 9.7735, 9.7043,\n 9.8054, 9.9060, 9.8373, 9.9374, 9.8691, 9.9687, 10.0679, 10.0000,\n 9.9325, 10.0312, 10.1295, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nskip this dreck , \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.6678, 1.5785, 1.4907, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.5635, 1.4812, 1.6803, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 1.0050, 0.9512, 1.0973, 1.0435, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.7127,\n 0.8438, 0.7971, 0.9272, 1.0565, 1.0096, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.0328, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.9313, 0.8866, 0.8422, 0.9659, 0.9215, 0.8773, 1.0000,\n 1.1221, 1.0777, 1.1990, 1.3197, 1.2752, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.3208, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 7.7723, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.1455, 9.0213, 8.8991, 8.7788,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.2706, 10.1614, 10.0535, 9.9469, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.2846, 11.1860, 11.0883, 10.9917, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.2414, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.2150, 12.1244,\n 12.0345, 11.9455, 11.8571, 11.9594, 12.0611, 12.1622, 12.0749, 12.1756,\n 12.0891, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.0795, 12.9952, 12.9116, 12.8285,\n 12.7461, 12.8414, 12.9363, 13.0307, 12.9491, 13.0431, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.9700, 13.8904, 13.8113, 13.7327, 13.6546, 13.5771, 13.6667,\n 13.7559, 13.8447, 13.7679, 13.8564, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ncontains very few laughs and even less surprises \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "7.5%", + "z-score": "-5.69", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.3982, -2.4495,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -3.1730, -3.2157, -3.2579, -3.2998,\n -3.3414, -3.3826, -3.4235, -3.4641, -3.5044, -3.3156, -3.3564, -3.3968,\n -3.4370, -3.4768, -3.5163, -3.5556, -3.5945, -3.6332, -3.6716, -3.7097,\n -3.7476, -3.7852, -3.8225, -3.8596, -3.8965, -3.9331, -3.7577, -3.7947,\n -3.8315, -3.8680, -3.9043, -3.9404, -3.9763, -3.8061, -3.8424, -3.8784,\n -3.9141, -3.9497, -3.9850, -4.0202, -4.0551, -4.0898, -4.1243, -4.1586,\n -4.1927, -4.2267, -4.2604, -4.2940, -4.3273, -4.3605, -4.3935, -4.4264,\n -4.4590, -4.4915, -4.5238, -4.5560, -4.5879, -4.6198, -4.6514, -4.6829,\n -4.7143, -4.7455, -4.7765, -4.8074, -4.8381, -4.8687, -4.8992, -4.9295,\n -4.9597, -4.9897, -5.0196, -5.0494, -5.0790, -5.1085, -5.1378, -5.1671,\n -5.1962, -5.2251, -5.2540, -5.1066, -5.1357, -5.1647, -5.1936, -5.2223,\n -5.2510, -5.2795, -5.3078, -5.3361, -5.3643, -5.3923, -5.4202, -5.4480,\n -5.4757, -5.5033, -5.5308, -5.5582, -5.5855, -5.6126, -5.6397, -5.6667,\n -5.6935, -5.7203, -5.7469, -5.6085, -5.6354, -5.6622, -5.6889])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 1.9415, 1.8257,\n 2.0738, 1.9599, 2.2011, 2.4371, 2.3238, 2.5538, 2.4422, 2.6667,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 2.8804, 2.7775,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.7700, 3.6662, 3.5642, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.2784, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 3.9736, 3.8819, 3.7916, 3.9624, 4.1312,\n 4.0415, 4.2080, 4.3727, 4.2836, 4.1957, 4.3580, 4.5186, 4.4313,\n 4.5899, 4.5035, 4.6603, 4.8154, 4.7296, 4.6448, 4.5611, 4.4783,\n 4.3966, 4.3158, 4.4680, 4.3879, 4.5384, 4.4590, 4.3804, 4.3027,\n 4.2258, 4.3740, 4.5210, 4.6667, 4.8111, 4.9543, 5.0964, 5.0190,\n 4.9424, 5.0829, 5.2223, 5.3606, 5.4977, 5.4212, 5.5572, 5.4813,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.3189, 5.4521, 5.3793, 5.5114,\n 5.6424, 5.7726, 5.7001, 5.6282, 5.5570, 5.4863, 5.4163, 5.3468,\n 5.2779, 5.4062, 5.5336, 5.4650, 5.5915, 5.7171, 5.8419, 5.7735,\n 5.8974, 5.8294, 5.7619, 5.6949, 5.6285, 5.5626, 5.6851, 5.8068,\n 5.7411, 5.8621, 5.9822, 6.1017, 6.0362, 6.1548, 6.0897, 6.0249,\n 5.9607, 5.8969, 5.8336, 5.9510, 6.0678, 6.0047, 5.9420, 5.8797,\n 5.9956, 6.1107, 6.2253, 6.3392, 6.4526, 6.5653, 6.6775, 6.7890,\n 6.7264, 6.8373, 6.9477, 7.0574, 7.1667, 7.2753, 7.3835, 7.3208,\n 7.4283, 7.5353, 7.6418, 7.5794, 7.6853, 7.7907, 7.7285, 7.8333,\n 7.9377, 7.8758, 7.9796, 7.9179, 8.0212, 8.1240, 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfilm to affirm love 's power to help people endure almost unimaginable horror \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.7145, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.1107, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.3760, 7.2488, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.4094, 9.3088, 9.2094, 9.3333, 9.4563, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.3695, 10.2763, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 10.8224, 10.9301, 11.0371, 11.1435, 11.2493, 11.1621,\n 11.0756, 10.9898, 10.9048, 11.0102, 11.1151, 11.0309, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.7498, 11.6666, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 11.9020, 11.8210, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.0893, 12.0114,\n 12.1076, 12.0302, 11.9534, 12.0493, 12.1447, 12.2397, 12.3342, 12.2581,\n 12.1825, 12.1073, 12.0327, 12.1270, 12.2209, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.7928, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nare an absolute joy \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.9336, -2.9775, -3.0210, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -3.0448, -3.0870, -3.1288, -3.1704,\n -2.9862, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.3131, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.4499, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.5645,\n -3.6008, -3.6369, -3.6728, -3.5132, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.5590,\n -3.4073, -3.4428, -3.4780, -3.3282, -3.3637, -3.3989, -3.4340, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.2460, -3.2811, -3.3160, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.3457, -3.3799, -3.4140, -3.2733, -3.3075,\n -3.1679, -3.2023, -3.0639, -3.0984, -2.9611, -2.9957, -3.0302, -2.8943,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -2.8675, -2.9016, -2.9355, -2.8043, -2.6737, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.0401, 6.9294, 6.8205, 6.9714,\n 6.8641, 6.7583, 6.6541, 6.5514, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.5970, 6.5008, 6.4059, 6.5504, 6.6935, 6.5997,\n 6.7414, 6.8819, 6.7890, 6.9282, 6.8364, 6.9743, 6.8834, 7.0201,\n 6.9303, 7.0657, 6.9768, 6.8889, 6.8019, 6.9361, 6.8500, 6.9830,\n 6.8977, 6.8133, 6.7298, 6.8615, 6.9923, 7.1220, 7.2508, 7.1678,\n 7.0857, 7.2134, 7.1319, 7.2587, 7.3845, 7.3037, 7.4286, 7.3485,\n 7.4724, 7.3930, 7.3143, 7.4373, 7.5595, 7.6808, 7.6026, 7.5251,\n 7.4483, 7.3721, 7.4924, 7.6120, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.3140, 8.2381, 8.3526, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.7574, 9.8611, 9.9642, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.3280, 10.4281, 10.3566, 10.2856, 10.3853,\n 10.3148, 10.4140, 10.5128, 10.4427, 10.5410, 10.6389, 10.7363, 10.8333,\n 10.9299, 11.0261, 10.9564, 10.8872, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ngenerates \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.9771, 1.1926, 1.4045, 1.6131, 1.8185, 1.7321,\n 1.9335, 1.8477, 1.7634, 1.9604, 1.8766, 2.0702, 2.2611, 2.1773,\n 2.0948, 2.0135, 1.9333, 2.1193, 2.0397, 1.9612, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.9413, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.3126, 2.2393, 2.1669, 2.3349, 2.2629, 2.1917, 2.1213,\n 2.2862, 2.2162, 2.1470, 2.0785, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.9720, 1.9066, 1.8419, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.9298, 2.0817, 2.2323, 2.3817, 2.3190,\n 2.2569, 2.1954, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.9524, 2.0948, 2.0373, 2.1783,\n 2.1210, 2.0642, 2.0078, 1.9518, 1.8962, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.9524, 1.8983, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.4284, 1.3779, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.3768, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.3303, 1.2839, 1.2377, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.5159, 1.6378, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321, 2.1004, 2.4495,\n 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712, 3.5796, 3.3968, 3.2222,\n 3.4915, 3.7524, 3.5839, 3.8367, 3.6742, 3.9196, 4.1586, 4.3916, 4.6188,\n 4.4610, 4.6829, 4.8999, 4.7469, 4.5985, 4.8107, 4.6664, 4.5260, 4.3894,\n 4.2563, 4.4634, 4.6667, 4.5363, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712,\n 5.0602, 5.2463, 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.4259, 5.3100,\n 5.1962, 5.0844, 4.9747, 5.1490, 5.0410, 4.9348, 4.8305, 5.0019, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.4610, 5.6220, 5.7812, 5.6804, 5.5811,\n 5.7382, 5.6401, 5.5435, 5.4482, 5.3541, 5.5090, 5.4160, 5.5691, 5.4772,\n 5.3865, 5.5377, 5.4480, 5.5976, 5.7458, 5.8926, 6.0380, 5.9488, 6.0927,\n 6.2354, 6.1470, 6.0596, 6.2008, 6.1143, 6.0288, 5.9442, 5.8605, 6.0000,\n 5.9171, 6.0553, 5.9732, 6.1101, 6.2459, 6.3807, 6.5144, 6.4327, 6.5653,\n 6.4842, 6.4040, 6.3246, 6.4558, 6.3770, 6.5072, 6.6365, 6.7648, 6.8922,\n 7.0187, 7.1443, 7.2691, 7.3930, 7.3143, 7.4373, 7.5595, 7.4813, 7.4039,\n 7.5251, 7.4483, 7.3721, 7.4924, 7.6120, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.9729, 8.0880, 8.0139, 8.1282, 8.2420, 8.1683,\n 8.0952, 8.2082, 8.1356, 8.0636, 7.9921, 7.9211, 8.0333, 7.9628, 8.0742,\n 8.0042, 7.9347, 8.0455, 7.9764, 8.0865, 8.1960, 8.3050, 8.4133, 8.3446,\n 8.4523, 8.5595, 8.4911, 8.4232, 8.5298, 8.4623, 8.3952, 8.3286, 8.2624,\n 8.3683, 8.3024, 8.4078, 8.3423, 8.2773, 8.3820, 8.4862, 8.5899, 8.6932,\n 8.7959, 8.8982, 8.8333, 8.7689, 8.8706, 8.9718, 9.0726, 9.0085, 8.9446,\n 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n, like life , is n't much fun without the highs and lows \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -1.9473, -1.9843, -2.0212, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.0849, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.9373, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.3445, 10.4537, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.7987, 10.9048, 11.0102, 10.9259, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.0125, 10.9317, 11.0346, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.0004, 11.1018, 11.2028, 11.1245, 11.0468,\n 10.9697, 10.8931, 10.8170, 10.9176, 10.8421, 10.7671, 10.8673, 10.7928,\n 10.7189, 10.6455, 10.7451, 10.6722, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.7527, 10.6817, 10.7795, 10.7090, 10.6389, 10.7363, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nbased on a true and historically significant story \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -1.9044, -1.9604, -2.0156, -2.0702, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.0735, -2.1128, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.3734, -2.4099, -2.4461, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.8641, 7.0133, 6.9076, 7.0553, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.0379, 6.9378, 6.8391, 6.9824, 7.1243, 7.0268, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.3901, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.6064, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.2435, 9.1553, 9.0680, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.0453, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.3686, 9.2867, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.7908, 9.7109, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.2516, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884, 10.5123,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.6665, 10.7671, 10.8673, 10.9669,\n 10.8925, 10.9917, 11.0904, 11.0165, 11.1148, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.2607, 11.1883, 11.1164, 11.2129, 11.3091, 11.2376, 11.3333,\n 11.4286, 11.3577, 11.4525, 11.3820, 11.4765, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwell-rounded tribute \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.3467, 7.2169,\n 7.0895, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.0335, 7.9216, 8.0632, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 8.9586, 8.8602, 8.9861, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.1157, 10.0261, 10.1391, 10.0504, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 10.8631, 10.7772,\n 10.6920, 10.6076, 10.5238, 10.6306, 10.7367, 10.6537, 10.7594, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.4009, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.7217, 11.8210, 11.7405, 11.8393, 11.9377,\n 11.8579, 11.9558, 11.8766, 11.7980, 11.8956, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.7017, 12.7928, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n, though many of the actors throw off a spark or two when they first appear , they ca n't generate enough heat in this cold vacuum of a comedy to start a reaction . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "11.6%", + "z-score": "-4.38", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.3126, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.9109, -2.9515,\n -2.9917, -3.0317, -3.0714, -3.1109, -3.1500, -3.1889, -3.0227, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.4403, -3.4769, -3.5132, -3.5494, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.6068, -3.6420, -3.6770, -3.7117, -3.7463,\n -3.7808, -3.8150, -3.8490, -3.8829, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.9673, -3.8205, -3.8538, -3.8869, -3.9198,\n -3.9526, -3.9853, -4.0177, -3.8740, -3.9067, -3.9392, -3.9716, -4.0038,\n -4.0359, -4.0678, -4.0996, -4.1312, -4.1627, -4.1940, -4.2252, -4.2563,\n -4.2872, -4.3180, -4.1798, -4.2108, -4.2416, -4.2723, -4.3029, -4.3333,\n -4.3637, -4.3938, -4.4239, -4.2889, -4.3191, -4.3492, -4.3792])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.9488, 7.1317, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 9.1455, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 9.8367, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.8542, 10.7429, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 10.9955, 11.1111, 11.2259, 11.1197, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.2522, 11.3644, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 12.0223, 11.9213, 12.0286, 12.1353, 12.0357, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.1568, 12.2615, 12.3655, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.7812, 12.8819, 12.9820, 12.8877, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.5827, 13.6781, 13.5876, 13.6826, 13.7772, 13.6876, 13.5987, 13.6931,\n 13.7870, 13.8804, 13.7926, 13.8857, 13.9784, 14.0707, 14.1625, 14.2539,\n 14.3449, 14.4355, 14.3491, 14.4394, 14.5293, 14.4437, 14.3587, 14.4484,\n 14.5378, 14.6267, 14.5426, 14.6313, 14.7195, 14.8074, 14.8950, 14.9821,\n 15.0689, 15.1553, 15.0726, 15.1587, 15.2446, 15.1625, 15.0810, 15.1667,\n 15.2520, 15.3370, 15.2563, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nso much like a young robert deniro \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "12.1%", + "z-score": "-4.22", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.0381, -2.1111, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.8295, -2.8808, -2.9314, -2.9814, -3.0308, -3.0796, -3.1278, -2.8868,\n -2.9361, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -2.8402,\n -2.8868, -2.9329, -2.9785, -3.0237, -3.0685, -3.1129, -3.1568, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -3.1730, -3.2157, -3.2579, -3.2998,\n -3.3414, -3.3826, -3.4235, -3.2332, -3.2746, -3.3156, -3.3564, -3.3968,\n -3.4370, -3.2525, -3.2931, -3.3333, -3.3733, -3.4130, -3.4524, -3.4915,\n -3.5303, -3.5689, -3.6072, -3.6452, -3.6829, -3.7205, -3.7577, -3.7947,\n -3.8315, -3.8680, -3.6961, -3.7330, -3.7697, -3.6004, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.7626,\n -3.7981, -3.6369, -3.4769, -3.5132, -3.5494, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.6068, -3.6420, -3.6770, -3.7117, -3.7463,\n -3.7808, -3.8150, -3.8490, -3.8829, -3.9166, -3.9501, -3.9835, -3.8341,\n -3.6856, -3.7196, -3.7534, -3.7870, -3.8205, -3.8538, -3.8869, -3.7417,\n -3.5973, -3.6310, -3.6645, -3.6979, -3.7311, -3.7641, -3.7970, -3.8297,\n -3.8623, -3.8947, -3.9269, -3.9590, -3.9910, -3.8516, -3.7131, -3.7455,\n -3.7778, -3.8100, -3.8420, -3.8739, -3.9056, -3.9372, -3.9687, -4.0000,\n -4.0312, -4.0622, -4.0931, -4.1239, -4.1546, -4.1851, -4.2155])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.2410, 9.1287, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.5909, 10.7066, 10.6061, 10.5067, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.2127, 11.3222, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.9060, 11.8151, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.3629,\n 12.4625, 12.3754, 12.2891, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 12.9116, 12.8285,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.4263, 13.3463, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.9007, 13.9897, 13.9113, 14.0000,\n 13.9221, 13.8447, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nkhouri manages , with terrific flair , to keep the extremes of screwball farce and blood-curdling family intensity on one continuum . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.8958, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.4059, -1.4536, -1.5010, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.2247,\n -1.0675, -0.9115, -0.9584, -1.0050, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.8407, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.8038, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.6011, 5.7689, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.2104, 7.1152, 7.2532, 7.3901, 7.5258, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.7387, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.7622, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.3820, 11.3032, 11.4031,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.3688, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.2209, 12.3143, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.2794, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfashioning an engrossing entertainment out \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "22", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.06", + "p value": "0.856", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -1.8604, -1.9127, -1.7111, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.5667, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.0605])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 1.1055, 1.3862, 1.6590, 1.9245, 2.1831, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.6726, 2.9055, 2.7852, 2.6681, 2.8943, 3.1160, 3.3333,\n 3.5466, 3.4293, 3.6380, 3.8431, 4.0446, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.3578, 6.5137, 6.4065, 6.5607, 6.7132,\n 6.8641, 6.7583, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.1813, 7.3233, 7.4639, 7.6033, 7.5032, 7.4044, 7.3068,\n 7.4449, 7.3485, 7.2532, 7.3901, 7.2960, 7.4316, 7.3386, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 7.9724, 7.8859, 8.0111,\n 7.9254, 7.8406, 7.9649, 8.0882, 8.2107, 8.1266, 8.2483, 8.1650,\n 8.2858, 8.2032, 8.1214, 8.2413, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.5516, 8.4718, 8.3927, 8.5088, 8.6241, 8.7388, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.9221, 9.0340, 8.9567, 9.0679, 8.9912,\n 8.9151, 9.0257, 8.9502, 9.0601, 8.9851, 8.9107, 9.0200, 8.9461,\n 8.8728, 8.9815, 8.9086, 9.0167, 9.1242, 9.2311, 9.1587, 9.0869,\n 9.1932, 9.1218, 9.2276, 9.1567, 9.0863, 9.1915, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.8054, 9.9060, 9.8373, 9.7690, 9.8691, 9.9687, 10.0679, 10.0000,\n 10.0987, 10.0312, 10.1295, 10.2273, 10.3248, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nspiffy animated feature \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.7237, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.0212, -2.0578, -2.0943, -2.1306, -2.0000,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.8577,\n 2.7136, 2.5744, 2.8301, 2.6943, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.6098, 3.8297, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.0034, 4.8857, 5.0684, 5.2485, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 7.8889, 7.7937, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.9079, 8.0370, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.3333, 10.4407, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 10.8515, 10.7719,\n 10.8749, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.3249, 11.4244, 11.5234, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.6840, 11.7808, 11.8771, 11.9730, 11.8973, 11.9928, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.3888, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthat 's so sloppily written and cast that you can not believe anyone more central to the creation of bugsy than the caterer \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "8.5%", + "z-score": "-5.36", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -2.3850, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.7761, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.8295, -2.8808, -2.9314, -2.9814, -3.0308, -3.0796, -3.1278, -3.1754,\n -3.2225, -3.2691, -3.3151, -3.3607, -3.4057, -3.1743, -3.2204, -3.2660,\n -3.3111, -3.3558, -3.4000, -3.4438, -3.4871, -3.5301, -3.5726, -3.6148,\n -3.4000, -3.4429, -3.4855, -3.5277, -3.5695, -3.6109, -3.4044, -3.4466,\n -3.4883, -3.5298, -3.5708, -3.6116, -3.6520, -3.6920, -3.7318, -3.7712,\n -3.8104, -3.8492, -3.8877, -3.6950, -3.7342, -3.7730, -3.8115, -3.8497,\n -3.8877, -3.9254, -3.9628, -4.0000, -4.0369, -4.0736, -4.1100, -4.1461,\n -4.1821, -4.2178, -4.2532, -4.2885, -4.3235, -4.3583, -4.1811, -4.2164,\n -4.2514, -4.2862, -4.3208, -4.3552, -4.3894, -4.4234, -4.4571, -4.4907,\n -4.5241, -4.3548, -4.3886, -4.4222, -4.4556, -4.4888, -4.5218, -4.3566,\n -4.3900, -4.4233, -4.4563, -4.4891, -4.5218, -4.5543, -4.5866, -4.6188,\n -4.6508, -4.6826, -4.7143, -4.7458, -4.5879, -4.6198, -4.6514, -4.6829,\n -4.7143, -4.7455, -4.7765, -4.8074, -4.8381, -4.8687, -4.8992, -4.9295,\n -4.9597, -4.9897, -5.0196, -5.0494, -5.0790, -5.1085, -5.1378, -4.9889,\n -5.0185, -5.0480, -5.0774, -5.1066, -5.1357, -5.1647, -5.1936, -5.2223,\n -5.2510, -5.2795, -5.1352, -5.1640, -5.1926, -5.2211, -5.2495, -5.2778,\n -5.1362, -5.1647, -5.1931, -5.2213, -5.2495, -5.2776, -5.3055, -5.3333,\n -5.3611, -5.3887, -5.4162, -5.4436, -5.4709, -5.3340, -5.3615])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.1241, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.2505, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.7006, 6.8483, 6.7469, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.3901, 7.2960, 7.2029, 7.3386, 7.4730,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 7.9460, 8.0741,\n 7.9839, 7.8948, 7.8065, 7.7192, 7.8463, 7.7598, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.1731, 8.2956, 8.2107, 8.1266, 8.0434, 8.1650,\n 8.2858, 8.4057, 8.3231, 8.4423, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.8294, 8.7482, 8.6677, 8.5879, 8.5088, 8.6241, 8.5456, 8.6603,\n 8.7742, 8.8874, 9.0000, 8.9221, 9.0340, 8.9567, 8.8800, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.3774, 9.3040, 9.2311, 9.3374, 9.2651,\n 9.3708, 9.4761, 9.5808, 9.6850, 9.6130, 9.7167, 9.6452, 9.5743,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.7415, 9.8431, 9.9442, 10.0448,\n 10.1450, 10.0753, 10.1750, 10.1058, 10.0371, 9.9687, 10.0679, 10.1667,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.4893, 10.5859, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nalternating between facetious comic parody and pulp melodrama , this smart-aleck movie ... tosses around some intriguing questions about the difference between human and android life \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 1.2372, 1.4697, 1.3744, 1.2810, 1.1896, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.2831, 1.4506, 1.6166, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.7467, 1.6823, 1.8419, 1.7778, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.5348, 1.6865,\n 1.8370, 1.9863, 1.9261, 1.8665, 1.8074, 1.9545, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.9524, 1.8953, 1.8385, 1.9803,\n 1.9237, 2.0642, 2.2037, 2.1470, 2.0907, 2.0349, 1.9795, 1.9245,\n 1.8699, 2.0068, 1.9524, 1.8983, 1.8446, 1.9799, 2.1143, 2.2478,\n 2.1938, 2.1401, 2.0868, 2.0339, 1.9813, 2.1128, 2.0604, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.7529, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.8598, 1.9868, 2.1131, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.2132, 2.1637, 2.2871, 2.2377, 2.1886, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 2.0688, 2.0212, 1.9738, 1.9267, 2.0470, 2.1667,\n 2.2857, 2.4042, 2.3567, 2.3094, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.6140, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.7574, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.5769, 10.6894, 10.8012, 10.9123, 10.8186,\n 10.9291, 10.8363, 10.9462, 11.0554, 11.1640, 11.0724, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.4261, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.5797, 11.6827, 11.7851, 11.8870, 11.8010,\n 11.9024, 11.8172, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.2627, 12.3603, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.8019, 12.7226, 12.8165, 12.9099, 13.0030, 12.9244, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.1376, 13.2288, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nstrung-together moments \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.3587,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -0.7939, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.5941,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.3698, -0.2304, -0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.5927, -0.6333, -0.5053, -0.3780, -0.2513, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321, 2.1004, 1.9052,\n 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856, 1.7321, 2.0381, 1.8889,\n 2.1822, 2.4659, 2.7406, 2.5924, 2.8577, 2.7136, 2.9704, 3.2205, 3.0792,\n 3.3221, 3.5590, 3.4207, 3.6515, 3.5165, 3.7417, 3.6098, 3.4816, 3.3566,\n 3.2348, 3.4528, 3.3333, 3.5466, 3.7559, 3.6380, 3.5228, 3.7273, 3.9284,\n 3.8146, 4.0119, 3.9001, 4.0937, 3.9837, 4.1740, 4.0657, 4.2528, 4.1461,\n 4.0415, 3.9386, 3.8376, 4.0205, 3.9208, 4.1008, 4.0024, 3.9056, 4.0825,\n 3.9869, 4.1612, 4.0667, 3.9736, 3.8819, 3.7916, 3.9624, 4.1312, 4.2981,\n 4.4630, 4.3727, 4.5356, 4.6967, 4.8561, 5.0138, 4.9237, 4.8347, 4.7469,\n 4.9023, 4.8154, 4.9691, 5.1212, 5.2719, 5.4212, 5.3345, 5.2489, 5.1643,\n 5.0807, 5.2278, 5.1450, 5.2906, 5.4349, 5.3526, 5.2713, 5.1908, 5.1111,\n 5.2535, 5.3947, 5.5348, 5.6737, 5.5942, 5.5155, 5.6530, 5.7894, 5.9247,\n 5.8464, 5.7689, 5.9029, 6.0359, 6.1680, 6.0908, 6.2217, 6.1451, 6.2750,\n 6.1990, 6.1237, 6.2524, 6.1777, 6.1036, 6.2312, 6.3580, 6.4838, 6.6088,\n 6.7330, 6.8564, 6.9789, 6.9048, 7.0265, 6.9529, 6.8799, 7.0007, 7.1207,\n 7.0481, 6.9762, 6.9048, 6.8339, 6.7637, 6.6939, 6.8127, 6.7434, 6.8614,\n 6.7925, 6.9097, 7.0262, 6.9577, 6.8897, 7.0054, 6.9378, 7.0527, 7.1670,\n 7.2807, 7.2134, 7.3263, 7.2594, 7.1929, 7.1270, 7.2391, 7.1735, 7.1083,\n 7.0436, 7.1549, 7.0905, 7.2012, 7.1372, 7.2472, 7.1835, 7.2929, 7.4017,\n 7.3383, 7.4465, 7.5542, 7.4911, 7.4283, 7.3660, 7.4729, 7.4109, 7.5173,\n 7.6231, 7.7285, 7.6667, 7.7715, 7.8758, 7.8142, 7.7530, 7.8567, 7.9599,\n 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n, generous and subversive artworks \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.2641, -0.3482, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -0.9233, -0.9733, -0.8076, -0.8577, -0.6939, -0.5315, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.0486, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.1419, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 6.1107, 5.9588, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.0849, 5.9479, 5.8140, 5.6830, 5.8635,\n 6.0413, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 8.8518, 8.9815, 8.8780, 8.7757, 8.9045, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.8529, 8.7600, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.7590, 9.8702, 9.7869, 9.8975, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.4652, 10.5714, 10.4898,\n 10.5955, 10.5145, 10.4341, 10.5393, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.6949, 11.7901, 11.7169, 11.6441, 11.7389, 11.6667,\n 11.7611, 11.6893, 11.6179, 11.7120, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit does n't follow the stale , standard , connect-the-dots storyline which has become commonplace in movies that explore the seamy underbelly of the criminal world \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "92", + "# Tokens in Greenlist": "13", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.6491, -2.6976, -2.7456, -2.7932, -2.8402,\n -2.8868, -2.9329, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.7080,\n -2.7540, -2.5560, -2.3604, -2.4077])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.3333, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.6584, 11.7647, 11.8704, 11.9754, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.3985, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.7376,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.3361, 13.4308, 13.3447, 13.4390, 13.5329, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.9111, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.1074, 14.1974, 14.2870, 14.3762, 14.2939, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.4850, 14.5726, 14.6599, 14.7468, 14.8333,\n 14.7533, 14.8396, 14.9255, 14.8462, 14.7673, 14.8530, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfunny yet \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -1.8716, -1.9180, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.9575, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.0674, -3.1009, -3.1342, -3.1674, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "192", + "# Tokens in Greenlist": "166", + "Fraction of T in Greenlist": "86.5%", + "z-score": "19.7", + "p value": "2.08e-86", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.1265, 4.0000,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.5940, 13.6950, 13.7953, 13.8952, 13.9944, 14.0930, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.6736, 14.7685, 14.8629, 14.9568, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.9591, 16.0476, 16.1357, 16.2233, 16.3106, 16.3975, 16.4839,\n 16.5700, 16.6557, 16.7410, 16.8259, 16.9105, 16.9947, 17.0785, 17.1620,\n 17.2451, 17.3279, 17.4103, 17.4924, 17.5741, 17.6556, 17.7367, 17.8174,\n 17.8979, 17.9780, 18.0578, 18.1373, 18.2165, 18.2954, 18.3739, 18.4522,\n 18.5302, 18.6079, 18.6853, 18.7625, 18.8393, 18.9159, 18.9921, 19.0681,\n 19.1439, 19.2194, 19.2946, 19.3695, 19.4442, 19.5186, 19.5928, 19.6667])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\noverbearing and over-the-top \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.9152, 0.8444, 1.0328,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.0954, 1.2710, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.0215,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.0622,\n 5.9438, 5.8275, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 6.8034, 6.9511, 6.8483, 6.7469, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.0000, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 8.9178, 9.0370, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.0134, 8.9285, 8.8443, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.2790, 10.2029, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.6455, 10.7451, 10.6722, 10.7714, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.0450, 10.9740, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's robert duvall ! \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.8034, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.4652, 0.4174, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.2182, 0.3482,\n 0.3038, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.9962, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.2590, 5.1490, 5.0410, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.4993, 6.4008,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.4059, 6.5504, 6.4566, 6.3640,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.3768, 6.5169, 6.6559, 6.7937,\n 6.9303, 6.8414, 6.7536, 6.8889, 6.8019, 6.7159, 6.8500, 6.7648,\n 6.6804, 6.5970, 6.5144, 6.6471, 6.7788, 6.6968, 6.6157, 6.7462,\n 6.8757, 7.0043, 7.1319, 7.2587, 7.3845, 7.5094, 7.6335, 7.7567,\n 7.8791, 7.7981, 7.7178, 7.6383, 7.7597, 7.8803, 8.0002, 7.9212,\n 8.0402, 7.9619, 8.0801, 8.0024, 7.9253, 8.0427, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.2381, 8.1628, 8.2772, 8.2024, 8.1282, 8.2420,\n 8.3550, 8.2813, 8.2082, 8.3205, 8.4322, 8.5433, 8.6537, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.0167, 8.9444, 9.0518, 9.1587, 9.0869,\n 9.1932, 9.2990, 9.2276, 9.3328, 9.4375, 9.3665, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.6394, 9.5698, 9.6719, 9.7735, 9.7043,\n 9.6356, 9.7367, 9.8373, 9.9374, 10.0371, 9.9687, 9.9008, 10.0000,\n 10.0987, 10.0312, 9.9641, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nrich and sudden wisdom \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.7%", + "z-score": "-3.66", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.4874, -2.5304, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -2.8928, -2.9317, -2.9704,\n -3.0089, -3.0471, -3.0851, -3.1229, -3.1604, -3.1977, -3.2348, -3.2717,\n -3.1165, -3.1536, -3.1905, -3.2271, -3.2636, -3.2998, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.3282, -3.3637, -3.3989, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.2460, -3.2811, -3.3160, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.5555, -3.5890, -3.4478, -3.4816,\n -3.5151, -3.5485, -3.5817, -3.6148, -3.4760, -3.5093, -3.5424, -3.5753,\n -3.6080, -3.6407, -3.6731, -3.7055, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.6987, -3.5648, -3.5970, -3.6291, -3.6610])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.1412, 2.3570, 2.5690, 2.4689,\n 2.6765, 2.8808, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.7928, 2.7005, 2.8919, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.6354, 2.8189, 3.0000, 2.9140, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.0151, 2.9329, 2.8518, 3.0237, 3.1937, 3.1129, 3.2806, 3.4466,\n 3.3659, 3.2863, 3.2077, 3.1300, 3.0533, 3.2157, 3.3764, 3.2998,\n 3.4586, 3.6159, 3.5396, 3.4641, 3.3895, 3.3156, 3.2426, 3.3968,\n 3.5496, 3.4768, 3.6279, 3.7778, 3.7051, 3.6332, 3.5620, 3.4915,\n 3.4217, 3.5689, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.8730, 3.8061, 3.9448, 4.0825,\n 4.0158, 3.9497, 3.8841, 3.8191, 3.7547, 3.8903, 4.0249, 3.9606,\n 4.0941, 4.2267, 4.1625, 4.0988, 4.0356, 3.9729, 3.9107, 4.0415,\n 4.1713, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.1210,\n 4.0608, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.3818,\n 4.3226, 4.2639, 4.2056, 4.3280, 4.4497, 4.3915, 4.5123, 4.6325,\n 4.5744, 4.5166, 4.4593, 4.4023, 4.3456, 4.4644, 4.5826, 4.5260,\n 4.6434, 4.7602, 4.7037, 4.6476, 4.5918, 4.5364, 4.4813, 4.5968,\n 4.7117, 4.6567, 4.7709, 4.8845, 4.8295, 4.7749, 4.7206, 4.6667,\n 4.6130, 4.7255, 4.8374, 4.7838, 4.8950, 5.0057, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nacted and directed , it 's clear that washington most certainly has a new career ahead of him \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.0289, 0.9631, 1.1375, 1.3101, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.3517, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.3128, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.5505, 1.4985,\n 1.6337, 1.5818, 1.7158, 1.6641, 1.6127, 1.5617, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.5848, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868, 3.2206, 3.5382,\n 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998, 3.5796, 3.3968, 3.2222,\n 3.4915, 3.7524, 4.0056, 3.8367, 3.6742, 3.5176, 3.3665, 3.6108, 3.8490,\n 4.0814, 3.9337, 3.7905, 4.0166, 4.2378, 4.4544, 4.3142, 4.1779, 4.0451,\n 3.9158, 4.1265, 4.3333, 4.5363, 4.4091, 4.2848, 4.4836, 4.6790, 4.8712,\n 4.7488, 4.6291, 4.5118, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 4.7281,\n 4.9075, 5.0844, 5.2590, 5.1490, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.3383, 5.2350, 5.1333, 5.2981, 5.4610, 5.6220, 5.5213, 5.4222, 5.3245,\n 5.2281, 5.3867, 5.5435, 5.6986, 5.6032, 5.5090, 5.6622, 5.8139, 5.9641,\n 5.8707, 5.7785, 5.6875, 5.5976, 5.7458, 5.8926, 6.0380, 5.9488, 5.8606,\n 6.0044, 6.1470, 6.2883, 6.2008, 6.1143, 6.0288, 5.9442, 6.0838, 6.2222,\n 6.3595, 6.2755, 6.1924, 6.3283, 6.4632, 6.5970, 6.5144, 6.4327, 6.3517,\n 6.2716, 6.4040, 6.5354, 6.6658, 6.5861, 6.5072, 6.6365, 6.7648, 6.8922,\n 6.8138, 6.7361, 6.6591, 6.5828, 6.7090, 6.8343, 6.9587, 6.8828, 6.8076,\n 6.7330, 6.6591, 6.5857, 6.7089, 6.8313, 6.9529, 6.8799, 7.0007, 7.1207,\n 7.2399, 7.3584, 7.4762, 7.4034, 7.5204, 7.4482, 7.5644, 7.6800, 7.6082,\n 7.7230, 7.6517, 7.7658, 7.6950, 7.6246, 7.5548, 7.4855, 7.5988, 7.7114,\n 7.8233, 7.7544, 7.8657, 7.7971, 7.9078, 8.0178, 8.1273, 8.2362, 8.1679,\n 8.2762, 8.2084, 8.3161, 8.4232, 8.5298, 8.6359, 8.5683, 8.6738, 8.6066,\n 8.7116, 8.8160, 8.9199, 9.0233, 8.9565, 9.0593, 8.9929, 9.0952, 9.1971,\n 9.2986, 9.3995, 9.3333, 9.4338, 9.3680, 9.4680, 9.5675, 9.6666, 9.7653,\n 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "accuracy_without_watermark": 0.49, + "accuracy_with_watermark": 0.56, + "f1_without_watermark": 0.5714285714285714, + "f1_with_watermark": 0.56 + } + }, + "validation": { + "results": [ + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's a charming and often affecting journey . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.1264, -2.1909,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.1229, -2.1828, -2.2418, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.0294, -2.0870, -2.1437, -2.1997, -1.9640, -2.0207,\n -2.0767, -2.1320, -1.9044, -1.9604, -1.7376, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -1.7638, -1.8161, -1.8677, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.5010, -1.5479, -1.3819, -1.2173, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.1140, -0.9584, -1.0050, -1.0513, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.7295, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.6333, -0.6737, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "46.2%", + "z-score": "6.92", + "p value": "2.31e-12", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641, 3.2206, 2.9938,\n 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712, 3.5796, 3.8497, 4.1111,\n 4.3644, 4.1812, 4.0056, 3.8367, 4.0825, 3.9196, 3.7626, 4.0012, 4.2339,\n 4.0814, 4.3083, 4.1603, 4.3818, 4.2378, 4.4544, 4.6664, 4.8742, 4.7336,\n 4.9373, 4.8003, 5.0000, 4.8662, 4.7357, 4.6082, 4.8038, 4.6790, 4.5569,\n 4.4374, 4.3205, 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 4.7281,\n 4.6188, 4.5115, 4.4061, 4.3026, 4.4809, 4.6568, 4.8305, 5.0019, 5.1711,\n 5.0680, 4.9666, 4.8667, 5.0332, 4.9346, 4.8375, 5.0017, 5.1640, 5.0679,\n 5.2281, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622, 5.5691, 5.7207,\n 5.6286, 5.7785, 5.6875, 5.5976, 5.5088, 5.6569, 5.5690, 5.4822, 5.3964,\n 5.3116, 5.2278, 5.3736, 5.5181, 5.6614, 5.8034, 5.7199, 5.6373, 5.5556,\n 5.4747, 5.3947, 5.3156, 5.4554, 5.5942, 5.7318, 5.8684, 6.0038, 5.9247,\n 5.8464, 5.7689, 5.9029, 5.8260, 5.7498, 5.8825, 6.0143, 5.9386, 6.0693,\n 5.9941, 6.1237, 6.0491, 6.1777, 6.3054, 6.4322, 6.3580, 6.4838, 6.4101,\n 6.5350, 6.4618, 6.3892, 6.3172, 6.4409, 6.3694, 6.2985, 6.2282, 6.1584,\n 6.0892, 6.2116, 6.3333, 6.4543, 6.5745, 6.5054, 6.4368, 6.3688, 6.3013,\n 6.2342, 6.1677, 6.2866, 6.4048, 6.5223, 6.6391, 6.7552, 6.6887, 6.6227,\n 6.5571, 6.6724, 6.6072, 6.5424, 6.6568, 6.7706, 6.7061, 6.8192, 6.7551,\n 6.8675, 6.8037, 6.9155, 7.0266, 7.1372, 7.0736, 7.1835, 7.1203, 7.2296,\n 7.1667, 7.1041, 7.0420, 7.1506, 7.0888, 7.0273, 6.9663, 6.9056, 6.8454,\n 6.9530, 7.0601, 7.1667, 7.2728, 7.2125, 7.1527, 7.0932, 7.0340, 6.9752,\n 6.9167])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nunflinchingly bleak and desperate \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.2487, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.1779, 4.0451, 3.9158, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.9962, 4.8712, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.6679, 6.8205, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.0553, 7.2016, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.0000, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.5553, 8.6770, 8.7978, 8.7104, 8.8304, 8.9496, 8.8631, 8.7773,\n 8.8958, 9.0134, 8.9285, 9.0453, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.1414, 9.2554, 9.3686, 9.2867, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.5714, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.8486, 9.7701, 9.8776, 9.9846, 9.9067, 9.8293, 9.9357, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.0987, 10.2029, 10.3065, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.2872, 10.3893, 10.4909, 10.4170, 10.5181, 10.6187,\n 10.5453, 10.4724, 10.5725, 10.6722, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.9220, 10.8505, 10.7795, 10.8770, 10.9740, 10.9034, 11.0000,\n 11.0961, 11.0261, 11.1218, 11.2171, 11.1475, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nallows us to hope that nolan is poised to embark a major career as a commercial yet inventive filmmaker . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -1.9749, -2.0197, -2.0641, -2.1082,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.1685, -2.2083, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.3063, -2.3443, -2.2024, -2.2406, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.2860, -2.3221, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 9.9495, 9.8271, 9.9570, 10.0857, 10.2132, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 10.9621, 10.8477, 10.9669, 11.0851, 11.2025, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.8895, 12.0005, 11.8915, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.0077, 12.1164, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.8582, 12.7550, 12.6529, 12.7567, 12.8598,\n 12.9624, 12.8618, 12.9639, 13.0655, 13.1665, 13.2669, 13.3667, 13.4660,\n 13.5647, 13.6630, 13.5647, 13.4674, 13.5654, 13.6629, 13.7599, 13.6640,\n 13.7606, 13.8567, 13.9524, 14.0475, 14.1422, 14.2364, 14.3302, 14.4234,\n 14.3295, 14.2364, 14.3295, 14.4222, 14.5144, 14.4225, 14.5144, 14.6059,\n 14.6970, 14.7877, 14.8779, 14.9677, 15.0571, 15.1461, 15.0560, 14.9666,\n 15.0555, 15.1440, 15.2321, 15.1438, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.5793, 15.6653, 15.7509, 15.8362, 15.7495, 15.6634, 15.7485, 15.8334,\n 15.9179, 15.8327, 15.9170, 16.0009, 16.0845, 16.1678, 16.2507, 16.3333,\n 16.4156, 16.4976, 16.4139, 16.3308, 16.4127, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe acting , costumes , music , cinematography and sound are all astounding given the production 's austere locales . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.2501, 1.4382, 1.6239, 1.8074,\n 1.7321, 1.9127, 2.0913, 2.0158, 1.9413, 1.8677, 2.0426, 1.9695,\n 2.1420, 2.0692, 1.9973, 2.1669, 2.0954, 2.0247, 2.1917, 2.1213,\n 2.0517, 2.2162, 2.1470, 2.0785, 2.0107, 2.1723, 2.1049, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.2222, 2.1567, 2.0918, 2.2468, 2.1822,\n 2.1182, 2.0548, 1.9920, 1.9298, 2.0817, 2.0197, 1.9582, 2.1082,\n 2.0470, 1.9863, 1.9261, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.5291, 1.4744, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.6246, 1.7619, 1.8983, 1.8446, 1.9799, 2.1143, 2.0605,\n 2.0071, 1.9540, 2.0868, 2.0339, 2.1656, 2.1128, 2.0604, 2.1909,\n 2.1386, 2.0866, 2.2159, 2.1640, 2.1125, 2.2406, 2.1892, 2.1381,\n 2.0873, 2.2140, 2.1634, 2.1131, 2.2387, 2.1884, 2.1385, 2.2630,\n 2.2132, 2.1637, 2.2871, 2.2377, 2.1886, 2.1398, 2.0913, 2.0430,\n 2.1648, 2.1167, 2.0688, 2.1896, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.8145, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.2205, 3.0792, 2.9424, 2.8098, 2.6811, 2.9212,\n 2.7952, 3.0290, 3.2577, 3.4816, 3.7009, 3.9158, 3.7897, 3.6667,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.7273, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 3.7700, 3.9595, 4.1461, 4.3301,\n 4.5115, 4.6904, 4.8669, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.7242, 5.8835, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.9378, 7.0812, 6.9824, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.7555, 7.6603, 7.7937, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.0370, 8.1651, 8.0741,\n 7.9839, 7.8948, 8.0219, 7.9336, 7.8463, 7.9724, 7.8859, 7.8003,\n 7.9254, 8.0497, 7.9649, 8.0882, 8.2107, 8.3324, 8.2483, 8.1650,\n 8.0824, 8.0006, 7.9196, 8.0403, 8.1602, 8.2793, 8.1989, 8.1192,\n 8.2375, 8.3550, 8.4718, 8.5879, 8.7033, 8.6241, 8.7388, 8.8527,\n 8.9660, 9.0786, 9.0000, 9.1119, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.2029, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.3617, 10.4638, 10.5654, 10.4909, 10.5921, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.9178, 10.8444, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 10.9220, 10.8505, 10.7795, 10.8770, 10.8064, 10.7363, 10.8333,\n 10.7637, 10.6944, 10.7910, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's slow -- very , very slow . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.9623, 0.8926, 1.0773, 1.0079, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 1.0719, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.4171, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.3166, 1.4629, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.5303, 1.6641, 1.6127, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.5236, 1.6530, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.5363, 1.6632, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.7693, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.5621, 1.5159, 1.6378, 1.5916, 1.7128, 1.8333,\n 1.7870, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "115", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "69.6%", + "z-score": "11", + "p value": "1.27e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.7967, 8.6522, 8.8015, 8.9489,\n 9.0947, 8.9544, 8.8168, 8.9618, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.1333, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.9585, 10.8423,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 11.2069, 11.0963,\n 10.9870, 11.1033, 10.9955, 10.8889, 11.0047, 11.1197, 11.0147, 11.1291,\n 11.0254, 10.9229, 11.0368])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nalthough laced with humor and a few fanciful touches , the film is a refreshingly serious look at young women . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "1", + "Fraction of T in Greenlist": "0.5%", + "z-score": "-7.98", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.6803, -2.7406, -2.7998, -2.8577,\n -2.9146, -2.9704, -3.0253, -3.0792, -3.1322, -3.1844, -3.2358, -3.2863,\n -3.3362, -3.3853, -3.4338, -3.4816, -3.5287, -3.5753, -3.6213, -3.6667,\n -3.7115, -3.7559, -3.7997, -3.8431, -3.8860, -3.9284, -3.9703, -4.0119,\n -4.0530, -4.0937, -4.1341, -4.1740, -4.2136, -4.2528, -4.2916, -4.3301,\n -4.3683, -4.4061, -4.4437, -4.4809, -4.5178, -4.5544, -4.5908, -4.6268,\n -4.6626, -4.6981, -4.7333, -4.7683, -4.8030, -4.8375, -4.8718, -4.9058,\n -4.9396, -4.9731, -5.0064, -5.0395, -5.0724, -5.1051, -5.1376, -5.1698,\n -5.2019, -5.2338, -5.2655, -5.2970, -5.3283, -5.3594, -5.3904, -5.4212,\n -5.4518, -5.4822, -5.5125, -5.5426, -5.5725, -5.6023, -5.6319, -5.6614,\n -5.6907, -5.7199, -5.7489, -5.7778, -5.8065, -5.8351, -5.8636, -5.8919,\n -5.9201, -5.9481, -5.9760, -6.0038, -6.0315, -6.0590, -6.0864, -6.1137,\n -6.1409, -6.1680, -6.1949, -6.2217, -6.2484, -6.2750, -6.3015, -6.3278,\n -6.3541, -6.3803, -6.4063, -6.4322, -6.4581, -6.4838, -6.5094, -6.5350,\n -6.5604, -6.5857, -6.6110, -6.6361, -6.6612, -6.6861, -6.7110, -6.7358,\n -6.7604, -6.7850, -6.8095, -6.8339, -6.8583, -6.8825, -6.9067, -6.9307,\n -6.9547, -6.9786, -7.0025, -7.0262, -7.0499, -7.0735, -7.0970, -7.1204,\n -7.1437, -7.1670, -7.1902, -7.2134, -7.2364, -7.2594, -7.2823, -7.3051,\n -7.3279, -7.3506, -7.3732, -7.3958, -7.4183, -7.4407, -7.4631, -7.4853,\n -7.5076, -7.5297, -7.5518, -7.5738, -7.5958, -7.6177, -7.6395, -7.6613,\n -7.6830, -7.7047, -7.7263, -7.7478, -7.7693, -7.7907, -7.8120, -7.8333,\n -7.8546, -7.8758, -7.8969, -7.9179, -7.9390, -7.9599, -7.9808])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.6030, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 6.9945, 6.8931,\n 7.0379, 6.9378, 7.0812, 7.2232, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.6210, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.6064, 7.7387, 7.6466, 7.7778, 7.6867, 7.8168, 7.7268, 7.6376,\n 7.5494, 7.6785, 7.8065, 7.9336, 8.0598, 7.9724, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439, 8.6581, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.5964, 8.7133,\n 8.6321, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.6963, 8.8095, 8.9221, 9.0340, 8.9567, 8.8800, 8.9912,\n 8.9151, 9.0257, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.3113,\n 9.4188, 9.3443, 9.2704, 9.3774, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.6532, 9.7574, 9.6850, 9.6130, 9.7167, 9.8198, 9.7483,\n 9.6774, 9.7800, 9.7095, 9.8116, 9.9132, 10.0143, 9.9442, 9.8746,\n 9.8054, 9.7367, 9.8373, 9.7690, 9.8691, 9.9687, 10.0679, 10.0000,\n 9.9325, 10.0312, 9.9641, 10.0624, 9.9957, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na sometimes tedious film . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.9467, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.4697, 1.3744, 1.2810, 1.1896, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.6678, 1.5785, 1.7889, 1.7002, 1.6131, 1.8185, 1.7321,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.5492,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.3101, 1.4809, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.7150, 1.8773, 2.0381,\n 2.1974, 2.3552, 2.2884, 2.2222, 2.1567, 2.0918, 2.2468, 2.1822,\n 2.3354, 2.2711, 2.2074, 2.1442, 2.0817, 2.2323, 2.3817, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 1.9242, 2.0682, 2.0101, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.8119, 1.9518, 2.0907, 2.0349, 2.1726, 2.1170,\n 2.0617, 2.1980, 2.1429, 2.2780, 2.4122, 2.3570, 2.3022, 2.4351,\n 2.3805, 2.3262, 2.2723, 2.2188, 2.1656, 2.2966, 2.2436, 2.3735,\n 2.3206, 2.2680, 2.2159, 2.3443, 2.2923, 2.4198, 2.5466, 2.6726,\n 2.6203, 2.5683, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.5604, 2.5099, 2.4597, 2.5820, 2.7036, 2.8245, 2.7741, 2.8943,\n 2.8440, 2.7940, 2.9132, 2.8633, 2.8137, 2.7644, 2.7154, 2.6667,\n 2.6182, 2.5700, 2.5220, 2.4744, 2.4269, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 7.8780, 7.6800, 7.4885, 7.6681,\n 7.8445, 8.0178, 7.8360, 7.6594, 7.4878, 7.3208, 7.1583, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.3659, 7.5340, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.9097, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.7586, 9.6470, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.6667, 10.5623, 10.6793, 10.7955, 10.9109,\n 10.8082, 10.9229, 10.8215, 10.9355, 11.0488, 11.1614, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.1568, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.0000, 12.9085, 13.0071, 13.1050, 13.0146, 12.9249,\n 12.8359, 12.7476, 12.8456, 12.9430, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.7986, 13.7122, 13.6264, 13.7194,\n 13.6343, 13.7270, 13.8193, 13.7350, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.4591, 14.5479, 14.4651, 14.3828, 14.4714,\n 14.5595, 14.4780, 14.3970, 14.3166, 14.2367, 14.3248, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nor doing last year 's taxes with your ex-wife . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.2404, -2.2937, -2.3462, -2.3982, -2.1773,\n -2.2299, -2.2819, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.6485, -2.6941, -2.5011, -2.5471, -2.3570,\n -2.4035, -2.4495, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.4885, -2.5322, -2.5756, -2.4004,\n -2.4441, -2.2711, -2.3150, -2.3586, -2.4019, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.2813, -2.3238, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.2943, -2.3354, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.1576, -2.1980, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.5181, -2.5538,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.4042, -2.4393, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.0451, 3.9158, 3.7897, 3.6667,\n 3.8765, 4.0825, 3.9614, 3.8431, 4.0446, 4.2426, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.1490, 5.3211, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 8.9178, 8.8227, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.6210, 9.5338, 9.6484, 9.7622, 9.6758, 9.5902, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.6757, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.4299, 9.3537, 9.2782, 9.2032, 9.1287,\n 9.0548, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.3374, 9.4432,\n 9.3708, 9.2990, 9.2276, 9.1567, 9.2619, 9.3665, 9.4707, 9.4002,\n 9.5038, 9.4338, 9.5369, 9.6394, 9.7415, 9.8431, 9.9442, 10.0448,\n 10.1450, 10.0753, 10.1750, 10.2743, 10.2050, 10.1363, 10.2350, 10.1667,\n 10.0987, 10.1970, 10.1295, 10.0624, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nyou do n't have to know about music to appreciate the film 's easygoing blend of comedy and romance . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "9.6%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.8295, -2.8808, -2.9314, -2.6833, -2.7351, -2.7863, -2.8368, -2.8868,\n -2.9361, -2.9848, -3.0330, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -3.2240, -3.2686, -3.3128, -3.3566,\n -3.4000, -3.1879, -3.2320])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.1333, 9.0068, 9.1455, 9.2828, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 9.8367, 9.7181, 9.8473,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.7586, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.1614, 10.0535, 9.9469, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.5909, 10.4903, 10.6061, 10.7211, 10.8353, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.1991, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.7766, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.3277, 12.2381, 12.3391, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 12.8556, 12.7690, 12.8661, 12.9628,\n 13.0590, 12.9732, 13.0690, 13.1644, 13.2593, 13.3537, 13.4477, 13.3631,\n 13.2791, 13.3728, 13.4661, 13.5589, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.9343, 13.8522, 13.7706, 13.8613, 13.9515, 14.0414, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.3166, 14.4046, 14.3248, 14.2455, 14.3333,\n 14.4208, 14.3422, 14.2640, 14.1863, 14.2737, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nin exactly 89 minutes , most of which passed as slowly as if i 'd been sitting naked on an igloo , formula 51 sank from quirky to jerky to utter turkey . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 2.2011, 2.0889, 1.9795, 2.2133, 2.1054, 2.0000,\n 2.2269, 2.4495, 2.3445, 2.2418, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.4930, 2.4004, 2.3094,\n 2.5064, 2.7005, 2.6098, 2.5205, 2.4327, 2.6222, 2.8093, 2.9938,\n 2.9057, 3.0873, 3.0000, 2.9140, 2.8292, 3.0071, 2.9231, 2.8402,\n 2.7585, 2.9329, 2.8518, 2.7717, 2.9433, 2.8638, 2.7854, 2.9542,\n 3.1211, 3.0429, 2.9656, 2.8893, 2.8138, 2.7393, 2.9025, 2.8284,\n 2.9897, 2.9161, 2.8433, 2.7713, 2.9299, 2.8583, 2.7875, 2.9439,\n 3.0989, 3.0282, 2.9582, 2.8889, 3.0415, 3.1928, 3.3428, 3.2733,\n 3.4217, 3.3526, 3.2841, 3.4308, 3.3627, 3.2953, 3.2285, 3.1623,\n 3.3066, 3.2408, 3.1755, 3.3182, 3.2533, 3.1889, 3.3301, 3.4701,\n 3.4058, 3.3420, 3.4806, 3.4171, 3.3542, 3.4913, 3.4286, 3.5645,\n 3.5022, 3.4403, 3.3789, 3.5132, 3.4521, 3.3915, 3.5245, 3.6566,\n 3.5960, 3.5359, 3.4762, 3.6068, 3.7366, 3.8655, 3.8057, 3.9337,\n 3.8741, 3.8150, 3.7563, 3.8829, 3.8244, 3.7664, 3.7087, 3.8341,\n 3.7766, 3.7196, 3.8438, 3.7870, 3.7306, 3.8538, 3.9762, 3.9198,\n 3.8638, 3.8081, 3.7528, 3.6979, 3.8189, 3.7641, 3.8843, 3.8297,\n 3.7755, 3.7216, 3.8406, 3.7869, 3.7335, 3.8516, 3.9691, 3.9158,\n 3.8627, 3.8100, 3.9265, 4.0423, 4.1576, 4.1048, 4.2193, 4.1667,\n 4.1143, 4.0622, 4.1758, 4.1239, 4.0723, 4.0210, 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.9704, 2.8301, 3.0792, 3.3221, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.0000,\n 4.8662, 4.7357, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.0401, 6.9294, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.0702, 8.9763, 8.8833, 8.7913, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.1252, 9.2435, 9.1553, 9.2729, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.3617, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.9542, 10.0647, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.3583, 10.2766, 10.1955, 10.1151,\n 10.0353, 10.1423, 10.0631, 10.1695, 10.2753, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.6082, 11.7050, 11.8014, 11.8973, 11.9928, 11.9176,\n 12.0127, 11.9380, 11.8638, 11.7901, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.9273, 12.0209, 11.9487, 12.0419, 12.1347, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe mesmerizing performances of the leads keep the film grounded and keep the audience riveted . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.1691, -2.2111, -2.2528, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.8887, -2.9241, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.8245, -2.8595, -2.8943,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.3434, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.6813, 9.8064, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.2923, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.5769, 10.6894, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.6206, 11.7249, 11.8287, 11.7395,\n 11.6510, 11.7543, 11.8571, 11.7696, 11.8719, 11.9737, 11.8870, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.5264, 12.4430, 12.5401, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.8019, 12.7226, 12.8165, 12.9099, 13.0030, 12.9244, 12.8464, 12.7688,\n 12.8616, 12.9540, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.6155, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit takes a strange kind of laziness to waste the talents of robert forster , anne meara , eugene levy , and reginald veljohnson all in the same movie . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -1.9596, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.5304, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.8928, -2.9317, -2.9704,\n -3.0089, -3.0471, -3.0851, -3.1229, -2.9659, -3.0039, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -3.0706, -3.1071, -3.1433, -2.9950, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -3.0657, -2.9215, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.3075,\n -3.3415, -3.3754, -3.4091, -3.4427, -3.4760, -3.5093, -3.3716, -3.4050,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.4017, -3.4346, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "85", + "Fraction of T in Greenlist": "42.7%", + "z-score": "5.77", + "p value": "3.95e-09", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.9901, 1.2687, 1.5396, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 1.1202, 1.3206, 1.5181, 1.7130, 1.9052,\n 1.8245, 2.0135, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.8677, 1.7951, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.6498, 1.8116,\n 1.9720, 2.1309, 2.2884, 2.4444, 2.5991, 2.5322, 2.6852, 2.6186,\n 2.5527, 2.4874, 2.4227, 2.3586, 2.5087, 2.4449, 2.3817, 2.3190,\n 2.2569, 2.4045, 2.3426, 2.4887, 2.4271, 2.3660, 2.3054, 2.2454,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.2943, 2.2361, 2.1783,\n 2.1210, 2.0642, 2.2037, 2.3422, 2.4797, 2.6163, 2.7520, 2.8868,\n 3.0206, 3.1536, 3.2857, 3.4170, 3.5474, 3.6770, 3.6178, 3.7463,\n 3.6874, 3.6289, 3.7563, 3.8829, 4.0087, 4.1338, 4.0750, 4.0166,\n 3.9586, 4.0825, 4.2056, 4.3280, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.3967, 4.5166, 4.4593, 4.5783, 4.6968, 4.8146, 4.9317, 4.8742,\n 4.9906, 5.1064, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.0513, 5.1647, 5.2775, 5.3898, 5.5015, 5.6126, 5.5562, 5.6667,\n 5.6104, 5.7203, 5.6643, 5.6085, 5.7177, 5.8263, 5.7707])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n... the film suffers from a lack of humor ( something needed to balance out the violence ) ... \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "25.2%", + "z-score": "0.0458", + "p value": "0.482", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.3982, -0.2265,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.5695, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.1849, -0.0461, -0.0919, 0.0458])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.1711, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.2923, 10.1948, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.3695, 10.4829, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.9637, 10.8729, 10.9816, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.0371, 11.1435, 11.0562, 10.9697,\n 10.8838, 10.9898, 10.9048, 10.8204, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.7822, 10.8867, 10.8051, 10.9091, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.2602, 11.3610, 11.2816, 11.3820, 11.3032, 11.4031,\n 11.5026, 11.4244, 11.5234, 11.4459, 11.3688, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.5109, 11.6082, 11.5329, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.9380, 12.0327, 11.9586, 12.0529, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.1867, 12.1141, 12.0419, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwe root for ( clara and paul ) , even like them , though perhaps it 's an emotion closer to pity . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 1.1333, 1.3245, 1.2501, 1.1767, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.2039, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.0370, 1.1877, 1.3373, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.3284, 1.2804, 1.4087, 1.3607, 1.3131, 1.2657, 1.3926,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.4699, 1.5916, 1.5457, 1.5000,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.4449, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.7927, 8.9178, 9.0419, 8.9469,\n 8.8529, 8.7600, 8.8833, 9.0057, 9.1273, 9.2480, 9.1561, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.1252, 9.0370, 9.1553, 9.2729, 9.1856,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.3686, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.0786, 9.1905, 9.3017, 9.4124, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.4299, 9.3537, 9.4619, 9.5695, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.6635, 9.7688, 9.6948, 9.6214,\n 9.7261, 9.6532, 9.7574, 9.6850, 9.6130, 9.5416, 9.6452, 9.5743,\n 9.5038, 9.4338, 9.5369, 9.6394, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.8054, 9.7367, 9.8373, 9.7690, 9.7011, 9.8012, 9.9008, 9.8333,\n 9.7663, 9.6996, 9.7987, 9.8974, 9.9957, 10.0935, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\neven horror fans will most likely not find what they 're seeking with trouble every day ; the movie lacks both thrills and humor . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.2222, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.5894, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -1.8665, -1.7041, -1.5430, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -1.9753, -1.8383, -1.7021, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.5492, -1.5878, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.4777, -1.5159, -1.5539, -1.5916, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.8974, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 3.9284, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.9528, 5.1326, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 9.9124, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.5621, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.9259, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.5022,\n 11.4209, 11.5217, 11.6219, 11.7217, 11.8210, 11.9197, 11.8393, 11.9377,\n 12.0355, 11.9558, 12.0532, 12.1502, 12.0712, 11.9927, 12.0893, 12.0114,\n 12.1076, 12.0302, 12.1260, 12.2214, 12.3163, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na gorgeous , high-spirited musical from india that exquisitely blends music , dance , song , and high drama . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "202", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "4.0%", + "z-score": "-6.91", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.6803, -2.7406, -2.7998, -2.8577,\n -2.9146, -2.9704, -3.0253, -3.0792, -3.1322, -3.1844, -3.2358, -3.2863,\n -3.3362, -3.3853, -3.4338, -3.4816, -3.5287, -3.5753, -3.6213, -3.6667,\n -3.7115, -3.7559, -3.7997, -3.8431, -3.8860, -3.9284, -3.9703, -4.0119,\n -4.0530, -4.0937, -4.1341, -4.1740, -4.2136, -4.2528, -4.2916, -4.0415,\n -4.0819, -4.1219, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.6796, -4.7140,\n -4.5138, -4.5491, -4.5840, -4.6188, -4.6533, -4.6876, -4.7217, -4.7556,\n -4.7892, -4.8226, -4.8559, -4.8889, -4.9217, -4.9543, -4.9868, -5.0190,\n -5.0511, -5.0829, -5.1146, -5.1461, -5.1775, -5.2086, -5.2396, -5.2705,\n -5.3011, -5.3316, -5.3620, -5.3921, -5.4222, -5.4521, -5.4818, -5.5114,\n -5.5408, -5.3675, -5.3974, -5.4272, -5.4568, -5.4863, -5.5156, -5.5448,\n -5.5739, -5.6028, -5.6316, -5.6602, -5.6887, -5.7171, -5.7454, -5.7735,\n -5.8015, -5.8294, -5.8571, -5.8848, -5.9123, -5.9397, -5.9670, -5.9941,\n -6.0212, -6.0481, -6.0750, -6.1017, -6.1283, -6.1548, -6.1812, -6.0249,\n -6.0517, -6.0784, -6.1049, -6.1314, -6.1577, -6.1839, -6.2101, -6.2361,\n -6.2620, -6.2879, -6.3136, -6.3392, -6.3648, -6.3902, -6.4156, -6.4409,\n -6.4660, -6.4911, -6.5161, -6.5410, -6.5659, -6.5906, -6.6152, -6.6398,\n -6.6643, -6.6887, -6.7130, -6.7372, -6.7614, -6.7854, -6.8094, -6.8333,\n -6.6909, -6.7151, -6.7392, -6.7632, -6.7872, -6.8111, -6.8349, -6.8586,\n -6.8822, -6.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.8419, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.4146, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.7555, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.9630, 7.8699, 8.0000, 8.1291, 8.0370, 7.9460, 8.0741,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.2588, 8.1731, 8.2956, 8.2107, 8.1266, 8.2483, 8.1650,\n 8.0824, 8.0006, 7.9196, 8.0403, 8.1602, 8.0798, 8.1989, 8.1192,\n 8.0402, 7.9619, 8.0801, 8.1976, 8.1198, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.0257, 8.9502, 9.0601, 8.9851, 9.0944, 9.0200, 9.1287,\n 9.2368, 9.3443, 9.4513, 9.5577, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 9.9642, 10.0668, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.0547, 9.9837, 10.0848, 10.0143, 10.1149, 10.2151,\n 10.1450, 10.2447, 10.1750, 10.2743, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.4312, 10.3628, 10.4603, 10.3923, 10.3248, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe emotions are raw and will strike a nerve with anyone who 's ever had family trauma . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -1.9066, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.8843, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -1.9107, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.1576, -2.0068, -1.8571, -1.8983, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -1.8257,\n -1.8656, -1.9052, -1.7636, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -1.9311, -1.7974, -1.8352, -1.7025,\n -1.7404, -1.6087, -1.4777, -1.5159, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "153", + "Fraction of T in Greenlist": "76.9%", + "z-score": "16.9", + "p value": "2.14e-64", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 8.7943, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.5219, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.1983, 10.0673, 10.1982, 10.3280,\n 10.4565, 10.5838, 10.7099, 10.8350, 10.9589, 10.8328, 10.7084, 10.8321,\n 10.9546, 11.0762, 11.1967, 11.3163, 11.4349, 11.5525, 11.4323, 11.3137,\n 11.4311, 11.5476, 11.6632, 11.7779, 11.8918, 12.0049, 12.1171, 12.0021,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.6566,\n 12.5462, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.0677, 12.9624, 13.0656, 13.1681, 13.2701, 13.3714, 13.4722,\n 13.5724, 13.6720, 13.5693, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.9594, 14.0561, 14.1524, 14.0530, 13.9544, 14.0505, 14.1462, 14.2413,\n 14.3360, 14.4301, 14.5238, 14.6170, 14.5206, 14.4250, 14.5181, 14.6107,\n 14.7029, 14.7947, 14.8860, 14.9769, 15.0674, 14.9737, 14.8807, 14.9711,\n 15.0610, 15.1505, 15.2397, 15.3284, 15.4167, 15.5046, 15.4135, 15.3230,\n 15.4108, 15.4983, 15.5853, 15.6720, 15.7584, 15.8443, 15.9299, 15.8411,\n 15.7529, 15.8384, 15.9235, 16.0083, 16.0928, 16.1769, 16.2607, 16.3441,\n 16.2574, 16.1713, 16.2547, 16.3377, 16.4205, 16.5028, 16.5849, 16.6667,\n 16.7481, 16.6634, 16.5793, 16.6607, 16.7417, 16.8225, 16.9030])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\naudrey tatou has a knack for picking roles that magnify her outrageous charm , and in this literate french comedy , she 's as morning-glory exuberant as she was in am\u00e9lie . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "18", + "Fraction of T in Greenlist": "9.0%", + "z-score": "-5.2", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.6803, -2.7406, -2.7998, -2.8577,\n -2.9146, -2.9704, -3.0253, -3.0792, -3.1322, -3.1844, -3.2358, -3.2863,\n -3.3362, -3.3853, -3.4338, -3.4816, -3.5287, -3.5753, -3.6213, -3.6667,\n -3.7115, -3.7559, -3.4763, -3.5228, -3.5687, -3.6141, -3.6589, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.4641,\n -3.5090, -3.5533, -3.5973, -3.6407, -3.6838, -3.7264, -3.7685, -3.8103,\n -3.8517, -3.8927, -3.9333, -3.9736, -3.7503, -3.7916, -3.8325, -3.8730,\n -3.9132, -3.9530, -3.9925, -3.7796, -3.8200, -3.6109, -3.6520, -3.6927,\n -3.4883, -3.5298, -3.5708, -3.6116, -3.6520, -3.6920, -3.7318, -3.7712,\n -3.8104, -3.8492, -3.8877, -3.9260, -3.7342, -3.7730, -3.8115, -3.8497,\n -3.8877, -3.9254, -3.9628, -4.0000, -4.0369, -4.0736, -4.1100, -4.1461,\n -4.1821, -4.2178, -4.0379, -4.0740, -4.1100, -4.1457, -3.9694, -4.0056,\n -4.0415, -4.0771, -4.1126, -3.9404, -3.9763, -4.0119, -4.0473, -4.0825,\n -3.9141, -3.9497, -3.7832, -3.8191, -3.6546, -3.6908, -3.7268, -3.7626,\n -3.7981, -3.8335, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -3.8490,\n -3.8837, -3.9181, -3.9524, -3.9865, -4.0204, -4.0541, -4.0876, -4.1210,\n -4.1542, -4.1872, -4.0345, -4.0678, -4.1009, -4.1338, -4.1666, -4.0166,\n -4.0496, -4.0825, -4.1152, -4.1477, -4.1800, -4.2122, -4.2443, -4.2762,\n -4.3079, -4.3395, -4.3710, -4.4023, -4.4334, -4.4644, -4.4953, -4.5260,\n -4.5566, -4.5871, -4.6174, -4.6476, -4.6776, -4.7076, -4.7374, -4.7670,\n -4.7966, -4.8260, -4.8553, -4.8845, -4.9135, -4.9425, -4.9713, -5.0000,\n -5.0286, -5.0571, -5.0854, -5.1137, -5.1418, -5.1698, -5.1978])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.3927, 9.5304, 9.4000, 9.5366, 9.6719, 9.8058, 9.9384, 9.8116,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 11.2069, 11.3228,\n 11.2124, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.5655,\n 11.6772, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.4405, 12.3377, 12.4434, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.6592, 12.5604, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 12.8766, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.3770, 13.4745, 13.5714, 13.4780, 13.5746, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.8642, 13.7730, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.0619, 14.1543, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.5226, 14.6126, 14.7023, 14.7916, 14.7049, 14.7939, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 14.9755, 14.8912, 14.8074, 14.8950, 14.9821,\n 15.0689, 15.1553, 15.2414, 15.1587, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.5870, 15.6709, 15.7545, 15.6736, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n... the movie is just a plain old monster . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.4974, -0.5410, -0.5843, -0.6274, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.5706, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.6202, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.2361, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.7373, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.6035, 8.7287,\n 8.6357, 8.7600, 8.8833, 9.0057, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.6484, 9.7622, 9.8753, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.0389, 10.1494, 10.2592, 10.1745, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.3333, 10.2509, 10.1692, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.8170, 10.9176, 10.8421, 10.9422, 11.0418, 10.9669,\n 11.0661, 10.9917, 11.0904, 11.1886, 11.1148, 11.0414, 10.9685, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.1164, 11.2129, 11.3091, 11.4047, 11.3333,\n 11.2624, 11.3577, 11.2872, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nin its best moments , resembles a bad high school production of grease , without benefit of song . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.6547, 0.5774,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.7559, 0.6888, 0.8716, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.6667, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.6660, 0.6124,\n 0.7625, 0.9115, 0.8575, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.7493,\n 0.7001, 0.8374, 0.9739, 0.9245, 1.0598, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.8340, 0.7878, 0.9165, 1.0445,\n 0.9981, 1.1251, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.8248, 0.7816, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "185", + "Fraction of T in Greenlist": "93.0%", + "z-score": "22.1", + "p value": "6.28e-109", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 4.9193, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.3621, 10.4952, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.8771,\n 11.9961, 12.1140, 12.2309, 12.3468, 12.4619, 12.5760, 12.6892, 12.8015,\n 12.9130, 13.0236, 13.1334, 13.2424, 13.3507, 13.4581, 13.5648, 13.6707,\n 13.7759, 13.8804, 13.9842, 14.0873, 14.1898, 14.2915, 14.3927, 14.4931,\n 14.5930, 14.6922, 14.7909, 14.8889, 14.9863, 15.0832, 15.1795, 15.2753,\n 15.3705, 15.4651, 15.5592, 15.6528, 15.7459, 15.8385, 15.9306, 16.0222,\n 16.1133, 16.2040, 16.2941, 16.3838, 16.4731, 16.5619, 16.6503, 16.7382,\n 16.8257, 16.9127, 16.9994, 17.0856, 17.1715, 17.2569, 17.3419, 17.4266,\n 17.5109, 17.5947, 17.6782, 17.7614, 17.8442, 17.9266, 18.0086, 18.0903,\n 18.1717, 18.2527, 18.3333, 18.4137, 18.4937, 18.5733, 18.6527, 18.7317,\n 18.8104, 18.8888, 18.9669, 19.0447, 19.1222, 19.1994, 19.2763, 19.3529,\n 19.4292, 19.5052, 19.5809, 19.6564, 19.7316, 19.8065, 19.8811, 19.9555,\n 20.0296, 20.1035, 20.1770, 20.2504, 20.3234, 20.3963, 20.4688, 20.5412,\n 20.6132, 20.6851, 20.7567, 20.8280, 20.8992, 20.9701, 21.0407, 21.1112,\n 21.1814, 21.2514, 21.3211, 21.3907, 21.4600, 21.5291, 21.5980, 21.6667,\n 21.7351, 21.8034, 21.8715, 21.9393, 22.0070, 22.0744, 22.1417])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\npumpkin takes an admirable look at the hypocrisy of political correctness , but it does so with such an uneven tone that you never know when humor ends and tragedy begins . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "11.6%", + "z-score": "-4.38", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.9109, -2.9515,\n -2.9917, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.6068, -3.6420, -3.4884, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.6635, -3.6980, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.7870, -3.8205, -3.8538, -3.8869, -3.9198,\n -3.9526, -3.9853, -4.0177, -4.0501, -4.0822, -4.1143, -4.1461, -4.1779,\n -4.2094, -4.0678, -3.9269, -3.9590, -3.9910, -4.0228, -4.0545, -4.0860,\n -3.9476, -3.9793, -4.0109, -4.0423, -4.0736, -4.1048, -4.1358, -4.1667,\n -4.1974, -4.2280, -4.2585, -4.2889, -4.3191, -4.3492, -4.3792])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415, 4.3409, 4.0825,\n 3.8411, 3.6148, 3.4017, 3.2004, 3.0096, 3.2998, 3.5796, 3.3968, 3.6667,\n 3.9279, 4.1812, 4.0056, 4.2515, 4.4907, 4.3217, 4.5547, 4.7819, 5.0037,\n 5.2204, 5.0576, 4.8999, 5.1121, 4.9592, 4.8107, 4.6664, 4.5260, 4.3894,\n 4.2563, 4.4634, 4.6667, 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.2463, 5.4295, 5.6099, 5.7877, 5.9628, 5.8398, 5.7192, 5.8919,\n 5.7735, 5.6573, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.0410, 6.1968, 6.3509,\n 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.4993, 6.4008, 6.3035, 6.2075,\n 6.1128, 6.0193, 6.1664, 6.3122, 6.2197, 6.3640, 6.5069, 6.6486, 6.5569,\n 6.6973, 6.8364, 6.7456, 6.8834, 7.0201, 7.1556, 7.2900, 7.2001, 7.1111,\n 7.2443, 7.1563, 7.0692, 6.9830, 6.8977, 6.8133, 6.7298, 6.8615, 6.9923,\n 6.9094, 7.0391, 7.1678, 7.2956, 7.2134, 7.3402, 7.4661, 7.3845, 7.5094,\n 7.6335, 7.7567, 7.8791, 7.7981, 7.7178, 7.8393, 7.7597, 7.6808, 7.6026,\n 7.5251, 7.4483, 7.3721, 7.4924, 7.6120, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.9318, 8.0476, 7.9729, 8.0880, 8.2024, 8.3162, 8.4293, 8.3550,\n 8.2813, 8.3937, 8.3205, 8.2479, 8.1758, 8.1043, 8.0333, 7.9628, 8.0742,\n 8.1851, 8.1150, 8.2252, 8.3349, 8.4439, 8.3742, 8.4826, 8.5905, 8.5212,\n 8.6284, 8.7351, 8.8413, 8.9469, 8.8780, 8.8094, 8.9145, 8.8464, 8.7788,\n 8.7116, 8.6448, 8.5785, 8.5126, 8.6169, 8.7207, 8.6551, 8.7584, 8.8612,\n 8.9635, 8.8982, 9.0000, 9.1013, 9.0364, 9.1372, 9.2376, 9.3375, 9.4370,\n 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe iditarod lasts for days - this just felt like it did . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.1815, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.2054, 8.0656, 8.2195, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 8.9935, 8.8667, 8.7419, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.2055, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 9.9146, 9.8064, 9.9304, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.3257, 10.4444, 10.5623, 10.4592, 10.5763, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.1172, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.2381, 12.3391, 12.4395, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.5615, 12.4746, 12.5732, 12.6713, 12.5852, 12.6830, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.2895, 13.3829, 13.4758, 13.5683, 13.4859, 13.5781,\n 13.6698, 13.5881, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.3166, 14.4046, 14.3248, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nholden caulfield did it better . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.1263, -0.9759, -0.8266, -0.8721, -0.7242, -0.7698,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.5477,\n -0.4095, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.6128, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.4644, -0.5053, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.3702, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "70.1%", + "z-score": "13.4", + "p value": "1.59e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.9060, 11.8151, 11.9187, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.5615, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.0690, 13.1644, 13.2593, 13.3537, 13.4477])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na delectable and intriguing thriller filled with surprises , read my lips is an original . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -1.0050, -1.0513, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -0.9304, -0.9759, -1.0211, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -0.9742, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.1399, -1.1790, -1.2179, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -1.1547, -1.1929, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 6.7337, 6.5433, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 5.9944, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.6647, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.1243, 7.0268, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.1731, 8.2956, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.0267, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.0910, 10.1968, 10.1189, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.0987, 10.2029, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.0074, 9.9340, 9.8611, 9.9642, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.3999, 10.3280, 10.4281, 10.5278, 10.4563, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 10.9299, 10.8602, 10.7910, 10.7222, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nseldom has a movie so closely matched the spirit of a man and his work . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.7107, 0.9169, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.9847,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.9058,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.9972, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 1.0812, 1.2243, 1.1711, 1.1183, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 1.1593, 1.2943, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.0096, 1.1380, 1.0911, 1.2185,\n 1.3453, 1.4713, 1.4241, 1.3771, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.1667,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.4659, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.6098, 3.4816, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.8857, 5.0684, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.9392, 10.0577, 9.9601, 9.8634, 9.7678, 9.6732, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.5427, 10.4524, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.0952, 11.2001, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.5655, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.8028, 11.9020, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.3263, 12.2467, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.5495, 12.4713, 12.5657, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.6918, 12.7847, 12.8771, 12.9691, 12.8928, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.1063, 13.1966, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nnicks , seemingly uncertain what 's going to make people laugh , runs the gamut from stale parody to raunchy sex gags to formula romantic comedy . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.6025, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, 0.1974, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.4536, 0.5879, 0.5410, 0.4944, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.6199, 0.5740, 0.5283, 0.4828, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.7029, 0.8248, 0.7816, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321, 1.5403, 1.3608,\n 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856, 1.7321, 1.5852, 1.4444,\n 1.3093, 1.6082, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 1.6590, 1.9245,\n 2.1831, 2.4351, 2.3113, 2.5560, 2.4345, 2.6726, 2.9055, 2.7852, 2.6681,\n 2.5538, 2.7791, 2.6667, 2.5568, 2.7761, 2.6679, 2.8823, 2.7757, 2.9856,\n 3.1918, 3.3947, 3.2883, 3.4873, 3.3824, 3.5777, 3.7700, 3.6662, 3.5642,\n 3.4641, 3.6522, 3.5533, 3.4562, 3.6407, 3.5447, 3.4503, 3.3574, 3.5382,\n 3.7166, 3.8927, 3.8000, 3.9736, 3.8819, 3.7916, 3.7025, 3.8730, 4.0415,\n 3.9530, 3.8657, 4.0316, 3.9452, 3.8600, 3.7758, 3.9389, 3.8555, 3.7732,\n 3.9340, 4.0931, 4.2507, 4.1684, 4.0872, 4.0069, 3.9276, 3.8492, 3.7717,\n 3.6950, 3.8490, 4.0016, 3.9253, 3.8497, 4.0004, 3.9254, 3.8512, 4.0000,\n 4.1475, 4.2938, 4.2196, 4.3644, 4.2907, 4.4341, 4.5762, 4.5029, 4.4302,\n 4.3583, 4.4987, 4.4272, 4.3564, 4.4953, 4.4249, 4.3552, 4.2861, 4.4234,\n 4.5596, 4.6949, 4.6258, 4.7599, 4.6912, 4.6232, 4.5557, 4.6883, 4.8200,\n 4.7527, 4.6860, 4.8164, 4.7501, 4.6843, 4.6191, 4.7481, 4.6832, 4.6188,\n 4.7467, 4.8737, 5.0000, 4.9356, 4.8717, 4.8083, 4.7454, 4.6829, 4.6209,\n 4.5594, 4.6838, 4.8074, 4.7460, 4.6850, 4.8076, 4.7469, 4.6867, 4.8083,\n 4.9292, 5.0494, 4.9891, 5.1085, 5.0485, 5.1671, 5.2850, 5.2251, 5.1657,\n 5.1066, 5.2235, 5.1647, 5.1063, 5.2223, 5.1642, 5.1064, 5.0489, 5.1640,\n 5.2784, 5.3923, 5.3349, 5.4480, 5.3909, 5.3340, 5.2775, 5.3898, 5.5015,\n 5.4451, 5.3891, 5.5000, 5.4442, 5.3887, 5.3335, 5.4436, 5.3886, 5.3340,\n 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe action switches between past and present , but the material link is too tenuous to anchor the emotional connections that purport to span a 125-year divide . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.2173, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.2730, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.1790, -1.2179, -1.0890, -1.1279, -1.1667,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 5.9479, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 7.8905, 7.7784, 7.9216, 7.8113, 7.9530, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.6747, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 10.0698, 9.9783, 9.8877, 10.0021,\n 9.9124, 9.8236, 9.7356, 9.6484, 9.5620, 9.4763, 9.5902, 9.5054,\n 9.4213, 9.3380, 9.2554, 9.3686, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.7224, 9.8293, 9.9357, 9.8590,\n 9.9648, 10.0701, 10.1749, 10.0987, 10.2029, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.3617, 10.2872, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 10.9178, 10.8444, 10.9431, 10.8702, 10.9685, 10.8961,\n 10.9939, 11.0913, 11.0194, 11.1164, 11.2129, 11.3091, 11.4047, 11.3333,\n 11.2624, 11.1919, 11.2872, 11.3820, 11.4765, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's an offbeat treat that pokes fun at the democratic exercise while also examining its significance for those who take part . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.0685, -3.1129, -3.1568, -3.2004,\n -3.2435, -3.0429, -2.8446, -2.8893, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.5852, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.9109, -2.9515,\n -2.9917, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.1229, -3.1604, -3.1977, -3.2348, -3.2717,\n -3.3083, -3.1536, -3.1905, -3.2271, -3.2636, -3.2998, -3.3359, -3.3717,\n -3.4073, -3.4428, -3.4780, -3.5131, -3.5480, -3.5827, -3.6172, -3.4689,\n -3.5036, -3.5382, -3.5725, -3.6067, -3.6407, -3.6745, -3.7082, -3.7417,\n -3.7750, -3.6310, -3.4879, -3.5218, -3.5555, -3.5890, -3.4478, -3.4816,\n -3.5151, -3.3754, -3.4091, -3.4427, -3.3044, -3.3381, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.5325, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868, 2.6605, 2.9938,\n 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712, 4.0415, 4.3027, 4.5556,\n 4.8008, 4.6101, 4.4272, 4.6663, 4.4907, 4.7237, 4.9507, 4.7819, 4.6188,\n 4.4610, 4.6829, 4.8999, 5.1121, 5.3199, 5.1671, 5.0186, 4.8742, 5.0779,\n 4.9373, 4.8003, 4.6667, 4.5363, 4.7357, 4.6082, 4.8038, 4.9962, 4.8712,\n 4.7488, 4.9377, 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.4259, 5.3100,\n 5.4848, 5.3709, 5.5432, 5.7133, 5.6011, 5.4909, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 6.0928, 5.9874, 5.8835, 5.7812, 5.6804, 5.8377,\n 5.9932, 6.1471, 6.2994, 6.1996, 6.1012, 6.2517, 6.1546, 6.0587, 5.9641,\n 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.3640, 6.5069, 6.4153, 6.5569,\n 6.6973, 6.6066, 6.5169, 6.4283, 6.3408, 6.4795, 6.6171, 6.7536, 6.8889,\n 6.8019, 6.7159, 6.8500, 6.7648, 6.6804, 6.5970, 6.5144, 6.6471, 6.5653,\n 6.4842, 6.4040, 6.5354, 6.6658, 6.7952, 6.9237, 7.0513, 6.9714, 6.8922,\n 6.8138, 6.7361, 6.8624, 6.9879, 7.1125, 7.2363, 7.1590, 7.0823, 7.2051,\n 7.1291, 7.0537, 6.9789, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.3584, 7.4762, 7.5933, 7.5204, 7.4482, 7.3765, 7.3054, 7.4215,\n 7.5369, 7.6517, 7.7658, 7.6950, 7.6246, 7.7380, 7.6681, 7.5988, 7.5299,\n 7.4616, 7.5740, 7.5061, 7.4386, 7.3717, 7.4833, 7.5944, 7.7048, 7.8147,\n 7.9241, 7.8572, 7.7908, 7.7249, 7.6594, 7.7679, 7.8759, 7.9833, 8.0902,\n 8.0249, 7.9601, 8.0663, 8.0018, 7.9377, 7.8740, 7.8107, 7.9162, 8.0212,\n 8.1258, 8.2298, 8.1667, 8.2702, 8.2074, 8.3103, 8.4128, 8.3503, 8.2882,\n 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's a cookie-cutter movie , a cut-and-paste job . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, 0.0000, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.8266, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.2780,\n -1.3195, -1.1794, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "200", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.5%", + "z-score": "11.6", + "p value": "2.2e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.1779, 4.0451, 3.9158, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.9614, 3.8431, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.5118, 4.3970, 4.2844, 4.4721, 4.3614, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.9747, 5.1490, 5.0410, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.0680, 5.2350, 5.1333, 5.0332, 5.1978, 5.0990, 5.2615, 5.1640,\n 5.0679, 4.9731, 5.1332, 5.2915, 5.4482, 5.6032, 5.7566, 5.6622,\n 5.5691, 5.7207, 5.8707, 5.7785, 5.6875, 5.8358, 5.7458, 5.6569,\n 5.8035, 5.7155, 5.8606, 5.7735, 5.6874, 5.6023, 5.7457, 5.6614,\n 5.8034, 5.7199, 5.8605, 6.0000, 6.1383, 6.0553, 6.1924, 6.3283,\n 6.4632, 6.5970, 6.5144, 6.4327, 6.5653, 6.6968, 6.6157, 6.7462,\n 6.8757, 6.7952, 6.9237, 7.0513, 7.1779, 7.3037, 7.4286, 7.5526,\n 7.6758, 7.7981, 7.9196, 8.0403, 7.9600, 8.0798, 8.0002, 8.1192,\n 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.4303, 8.5456, 8.6603,\n 8.7742, 8.6963, 8.6190, 8.7323, 8.8448, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.0257, 9.1357, 9.2450, 9.3537, 9.4619, 9.5695, 9.4939,\n 9.4188, 9.5258, 9.6322, 9.5577, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.3999, 10.5001, 10.5998, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006, 11.5943])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ni had to look away - this was god awful . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "12.6%", + "z-score": "-4.05", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.7272, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.1889, -3.2276, -3.2660,\n -3.3041, -3.3420, -3.3797, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.4403, -3.4769, -3.5132, -3.5494, -3.5853, -3.6210, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.6068, -3.6420, -3.6770, -3.7117, -3.5590,\n -3.5941, -3.6289, -3.6635, -3.6980, -3.5480, -3.5827, -3.6172, -3.6515,\n -3.6856, -3.7196, -3.5725, -3.6067, -3.6407, -3.6745, -3.7082, -3.7417,\n -3.7750, -3.8081, -3.8411, -3.8740, -3.9067, -3.7641, -3.7970, -3.8297,\n -3.8623, -3.8947, -3.9269, -3.9590, -3.8194, -3.8516, -3.8838, -3.9158,\n -3.9476, -3.8100, -3.8420, -3.8739, -3.9056, -3.9372, -3.9687, -3.8333,\n -3.8649, -3.8964, -3.9278, -3.9590, -3.9900, -4.0210, -4.0518])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.1240, 7.0014, 7.1590, 7.0387, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.3521, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.4449, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.5396, 8.4444, 8.5715, 8.4774, 8.6035, 8.5105,\n 8.6357, 8.5437, 8.4526, 8.3625, 8.4868, 8.3976, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.3813, 8.5030, 8.6238, 8.5381, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 8.8778, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.0595, 9.1735, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.5840, 9.6921, 9.7997, 9.7224, 9.8293, 9.7526, 9.8590,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.6144, 10.7159, 10.6404, 10.5654, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.7451, 10.6722, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.7527, 10.8505, 10.9480, 11.0450, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.1919, 11.1218, 11.0521, 11.1475, 11.2424, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthanks to scott 's charismatic roger and eisenberg 's sweet nephew , roger dodger is one of the most compelling variations on in the company of men . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328,\n 1.2189, 1.4027, 1.5843, 1.7638, 1.6908, 1.6187, 1.7951, 1.7233,\n 1.8972, 2.0692, 2.2393, 2.1669, 2.0954, 2.0247, 2.1917, 2.1213,\n 2.0517, 1.9829, 1.9149, 2.0785, 2.0107, 2.1723, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 2.1567, 2.0918, 2.0276, 2.1822,\n 2.1182, 2.2711, 2.4227, 2.3586, 2.2952, 2.2323, 2.3817, 2.3190,\n 2.2569, 2.4045, 2.3426, 2.2813, 2.2205, 2.3660, 2.3054, 2.2454,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.3529, 2.2943, 2.4348, 2.5744,\n 2.7129, 2.8505, 2.7913, 2.7325, 2.8687, 2.8101, 2.9451, 3.0792,\n 3.2124, 3.1536, 3.0952, 3.0373, 3.1690, 3.1113, 3.0540, 2.9971,\n 2.9406, 3.0706, 3.0143, 3.1433, 3.2715, 3.2152, 3.1593, 3.1038,\n 3.0486, 2.9938, 3.1203, 3.0657, 3.0114, 3.1368, 3.0827, 3.2071,\n 3.3309, 3.2768, 3.2230, 3.1696, 3.2921, 3.2389, 3.3606, 3.3075,\n 3.2547, 3.2023, 3.1502, 3.2705, 3.2186, 3.1669, 3.2863, 3.2348,\n 3.1836, 3.1327, 3.2509, 3.2002, 3.3177, 3.4346, 3.5509, 3.5000,\n 3.4494, 3.5648, 3.5143, 3.6291, 3.7432, 3.8569, 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641, 3.2206, 2.9938,\n 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712, 3.5796, 3.8497, 4.1111,\n 4.3644, 4.1812, 4.0056, 3.8367, 4.0825, 3.9196, 3.7626, 4.0012, 4.2339,\n 4.4610, 4.3083, 4.5301, 4.7469, 4.5985, 4.8107, 5.0186, 5.2223, 5.4222,\n 5.6183, 5.8108, 5.6667, 5.5261, 5.7155, 5.9017, 5.7646, 5.6307, 5.4997,\n 5.6830, 5.5549, 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828,\n 6.0622, 5.9438, 5.8275, 5.7133, 5.6011, 5.4909, 5.6585, 5.5500, 5.4433,\n 5.3383, 5.2350, 5.4000, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968, 6.0943,\n 6.2483, 6.1471, 6.0474, 6.1996, 6.3502, 6.2517, 6.4008, 6.5483, 6.6944,\n 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.0711, 7.2104, 7.3485, 7.2532,\n 7.3901, 7.5258, 7.6603, 7.7937, 7.9259, 8.0571, 8.1873, 8.3164, 8.2222,\n 8.1291, 8.2572, 8.3843, 8.2923, 8.2012, 8.1111, 8.2372, 8.3625, 8.2733,\n 8.3976, 8.5210, 8.6436, 8.7652, 8.6770, 8.5896, 8.7104, 8.8304, 8.7439,\n 8.6581, 8.7773, 8.8958, 8.8108, 8.7267, 8.8443, 8.9612, 8.8778, 8.7952,\n 8.9113, 9.0267, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.0786, 9.0000, 9.1119, 9.2232, 9.1452, 9.0679, 9.1785, 9.2885,\n 9.2118, 9.1357, 9.2450, 9.3537, 9.2782, 9.2032, 9.3113, 9.4188, 9.3443,\n 9.2704, 9.3774, 9.4837, 9.4103, 9.3374, 9.4432, 9.5485, 9.4761, 9.4042,\n 9.5089, 9.6130, 9.5416, 9.4707, 9.5743, 9.6774, 9.6069, 9.5369, 9.6394,\n 9.7415, 9.6719, 9.6028, 9.7043, 9.8054, 9.7367, 9.6684, 9.7690, 9.8691,\n 9.8012, 9.7337, 9.8333, 9.9325, 9.8654, 9.7987, 9.8974, 9.9957, 9.9294,\n 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n... designed to provide a mix of smiles and tears , `` crossroads '' instead provokes a handful of unintentional howlers and numerous yawns . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "28.0%", + "z-score": "0.726", + "p value": "0.234", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, 0.1143, 0.2844, 0.4529,\n 0.6198, 0.7851, 0.7256])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "150", + "Fraction of T in Greenlist": "75.4%", + "z-score": "16.4", + "p value": "7.87e-61", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.3283, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 9.1002, 8.9456, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.5230, 9.3811, 9.5219, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.0673, 9.9333, 10.0664, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.5838, 10.4565, 10.5830, 10.7084, 10.8328, 10.9560, 11.0782,\n 11.1994, 11.0762, 10.9546, 11.0755, 11.1954, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.5476, 11.4311, 11.5470, 11.6620, 11.7762, 11.8896, 12.0021,\n 12.1139, 12.0005, 11.8885, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.4370, 12.3289, 12.4365, 12.5434, 12.6496, 12.7551, 12.8599,\n 12.9641, 12.8586, 12.7542, 12.8582, 12.9616, 13.0644, 13.1665, 13.2681,\n 13.3690, 13.2669, 13.1657, 13.2665, 13.3667, 13.4664, 13.5655, 13.6640,\n 13.7621, 13.6630, 13.5647, 13.6626, 13.7599, 13.8567, 13.9531, 14.0489,\n 14.1442, 14.0479, 13.9524, 14.0475, 14.1422, 14.2364, 14.3302, 14.4234,\n 14.5162, 14.4225, 14.3295, 14.4222, 14.5144, 14.6062, 14.6976, 14.7885,\n 14.8790, 14.7877, 14.6970, 14.7874, 14.8773, 14.9669, 15.0560, 15.1448,\n 15.2332, 15.1440, 15.0555, 15.1438, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.5793, 15.4922, 15.4057, 15.4919, 15.5778, 15.6634, 15.7485, 15.8334,\n 15.9179, 15.8327, 15.7481, 15.8325, 15.9165, 16.0002, 16.0836, 16.1667,\n 16.2494, 16.1660, 16.0832, 16.1658, 16.2481, 16.3301, 16.4118])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na gorgeous , witty , seductive movie . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "1", + "Fraction of T in Greenlist": "0.5%", + "z-score": "-7.98", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.5820, -2.6458, -2.7080, -2.7689, -2.8284,\n -2.8868, -2.9439, -3.0000, -3.0551, -3.1091, -3.1623, -3.2146, -3.2660,\n -3.3166, -3.3665, -3.4157, -3.4641, -3.5119, -3.5590, -3.6056, -3.6515,\n -3.6968, -3.7417, -3.7859, -3.8297, -3.8730, -3.9158, -3.9581, -4.0000,\n -4.0415, -4.0825, -4.1231, -4.1633, -4.2032, -4.2426, -4.2817, -4.3205,\n -4.3589, -4.3970, -4.4347, -4.4721, -4.5092, -4.5461, -4.5826, -4.6188,\n -4.6547, -4.6904, -4.7258, -4.7610, -4.7958, -4.8305, -4.8648, -4.8990,\n -4.6626, -4.6981, -4.7333, -4.7683, -4.8030, -4.8375, -4.8718, -4.9058,\n -4.9396, -4.9731, -5.0064, -5.0395, -5.0724, -5.1051, -5.1376, -5.1698,\n -5.2019, -5.2338, -5.2655, -5.2970, -5.3283, -5.3594, -5.3904, -5.4212,\n -5.4518, -5.4822, -5.5125, -5.5426, -5.5725, -5.6023, -5.6319, -5.6614,\n -5.6907, -5.7199, -5.7489, -5.7778, -5.8065, -5.8351, -5.8636, -5.8919,\n -5.9201, -5.9481, -5.9760, -6.0038, -6.0315, -6.0590, -6.0864, -6.1137,\n -6.1409, -6.1680, -6.1949, -6.2217, -6.2484, -6.2750, -6.3015, -6.3278,\n -6.3541, -6.3803, -6.4063, -6.4322, -6.4581, -6.4838, -6.5094, -6.5350,\n -6.5604, -6.5857, -6.6110, -6.6361, -6.6612, -6.6861, -6.7110, -6.7358,\n -6.7604, -6.7850, -6.8095, -6.8339, -6.8583, -6.8825, -6.9067, -6.9307,\n -6.9547, -6.9786, -7.0025, -7.0262, -7.0499, -7.0735, -7.0970, -7.1204,\n -7.1437, -7.1670, -7.1902, -7.2134, -7.2364, -7.2594, -7.2823, -7.3051,\n -7.3279, -7.3506, -7.3732, -7.3958, -7.4183, -7.4407, -7.4631, -7.4853,\n -7.5076, -7.5297, -7.5518, -7.5738, -7.5958, -7.6177, -7.6395, -7.6613,\n -7.6830, -7.7047, -7.7263, -7.7478, -7.7693, -7.7907, -7.8120, -7.8333,\n -7.8546, -7.8758, -7.8969, -7.9179, -7.9390, -7.9599, -7.9808])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 6.5997,\n 6.8127, 7.0201, 6.7778, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.1358, 7.3271, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.1882, 8.3557, 8.1763, 8.3423, 8.1689, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.4857, 8.6424, 8.7970, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 8.9455, 9.0924, 8.9489,\n 9.0947, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.2536,\n 9.3927, 9.5304, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.6867, 9.8187, 9.9495, 9.8271, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.1024, 10.2283, 10.3532, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.5027, 10.6232, 10.7429, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.2623, 11.3740, 11.4849, 11.3842,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.6242, 11.7320, 11.8392,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.0902, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.3168,\n 12.4181, 12.5188, 12.4286, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.6492, 12.7476, 12.8456, 12.7581, 12.6713, 12.7690, 12.8661, 12.9628,\n 13.0590, 12.9732, 13.0690, 13.1644, 13.0795, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.2895, 13.3829, 13.4758, 13.3933, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.5985, 13.6896, 13.7803, 13.6999, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 13.9007, 13.9897, 14.0784, 14.0000,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.2737, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nif the movie succeeds in instilling a wary sense of ` there but for the grace of god , ' it is far too self-conscious to draw you deeply into its world . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.8266, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.1790, -1.0499, -1.0890, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.9962, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.5432, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 5.8789, 6.0404, 5.9333, 5.8279, 5.7242, 5.8835, 5.7812, 5.6804,\n 5.5811, 5.7382, 5.6401, 5.7955, 5.6986, 5.8522, 5.7566, 5.9084,\n 6.0587, 5.9641, 6.1128, 6.0193, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.6486, 6.7890, 6.9282, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.6770, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.8430, 9.9542, 10.0647, 10.1745, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.9091, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.2602, 11.3610, 11.4614, 11.3820, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.5444, 11.4674, 11.3910, 11.3150,\n 11.4132, 11.5109, 11.4356, 11.3608, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.0209, 12.1141, 12.2068, 12.2992, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit does n't believe in itself , it has no sense of humor ... it 's just plain bored . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -1.9107, -1.7488, -1.5882, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.6710, -1.5181, -1.5614, -1.6045, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.4910, -2.5265, -2.5618, -2.5969, -2.6319, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 7.7784, 7.9216, 8.0632, 8.2035, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.2916, 8.1862, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.4449, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.5400, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.9625, 10.0748, 9.9878, 10.0995,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.5002, 10.4164, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.5985, 13.6896, 13.7803, 13.8707, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.3166, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na sequence of ridiculous shoot - 'em - up scenes . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -0.9734, -1.0206,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.6412, -0.4915, -0.5387, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.7789, -0.8199, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.0401, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.9853, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.6035, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.5769, 8.4868, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.6581, 8.7773,\n 8.6924, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.5931, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.7908, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.2753, 10.3805, 10.3020, 10.4067,\n 10.3289, 10.2516, 10.3557, 10.2790, 10.3827, 10.3065, 10.2310, 10.3341,\n 10.2591, 10.1846, 10.2872, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.7451, 10.8444, 10.7714, 10.6990, 10.6271, 10.7258,\n 10.8241, 10.7527, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 10.9299, 11.0261, 10.9564, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe weight of the piece , the unerring professionalism of the chilly production , and the fascination embedded in the lurid topic prove recommendation enough . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "10.1%", + "z-score": "-4.87", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -1.9702, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.8295, -2.8808, -2.9314, -2.9814, -3.0308, -2.7863, -2.8368, -2.8868,\n -2.9361, -2.9848, -3.0330, -2.8006, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.8402,\n -2.8868, -2.9329, -2.9785, -3.0237, -3.0685, -3.1129, -3.1568, -3.2004,\n -3.2435, -3.2863, -3.3288, -3.3708, -3.4125, -3.4538, -3.4949, -3.2998,\n -3.3414, -3.3826, -3.4235, -3.4641, -3.5044, -3.3156, -3.1288, -3.1704,\n -3.2116, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.3131, -3.3526, -3.3918, -3.4308, -3.4694, -3.5079, -3.5460, -3.5839,\n -3.6216, -3.6590, -3.6961, -3.7330, -3.7697, -3.8061, -3.8424, -3.8784,\n -3.9141, -3.7471, -3.7832, -3.8191, -3.8548, -3.8903, -3.9255, -3.7626,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.7439, -3.7791, -3.8142, -3.8490,\n -3.8837, -3.9181, -3.9524, -3.9865, -4.0204, -4.0541, -4.0876, -4.1210,\n -4.1542, -4.1872, -4.2200, -4.2527, -4.2852, -4.3176, -4.3498, -4.3818,\n -4.4137, -4.4454, -4.4769, -4.3280, -4.3598, -4.3915, -4.4230, -4.4544,\n -4.4856, -4.3395, -4.1944, -4.2262, -4.2578, -4.2893, -4.3207, -4.3519,\n -4.3830, -4.4140, -4.4448, -4.4754, -4.5060, -4.5364, -4.5666, -4.5968,\n -4.6268, -4.6567, -4.6864, -4.7161, -4.7456, -4.7749, -4.8042, -4.8333,\n -4.8624, -4.8913, -4.9200, -4.9487, -4.9773, -4.8416, -4.8703])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.1603, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.0623, 4.9316, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.0928, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.3509, 6.2483, 6.1471, 6.2994, 6.4501, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.6454, 6.5504, 6.4566, 6.3640,\n 6.5069, 6.6486, 6.7890, 6.9282, 7.0662, 6.9743, 7.1110, 7.2466,\n 7.1556, 7.0657, 6.9768, 7.1111, 7.2443, 7.1563, 7.0692, 6.9830,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.6315, 7.5484, 7.6734, 7.7976, 7.7152, 7.6335, 7.5526,\n 7.6758, 7.7981, 7.9196, 8.0403, 8.1602, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.1585, 8.0801, 8.1976, 8.3143, 8.2365, 8.1594, 8.0829,\n 8.1988, 8.3140, 8.4286, 8.5424, 8.6556, 8.5796, 8.6921, 8.8039,\n 8.7284, 8.6535, 8.5792, 8.6903, 8.8008, 8.7270, 8.6537, 8.5810,\n 8.6908, 8.8000, 8.9086, 9.0167, 9.1242, 9.0518, 9.1587, 9.2651,\n 9.1932, 9.1218, 9.0510, 9.1567, 9.2619, 9.1915, 9.1215, 9.0520,\n 9.1566, 9.2607, 9.3642, 9.4673, 9.5698, 9.5007, 9.6028, 9.7043,\n 9.6356, 9.5673, 9.4995, 9.6005, 9.7011, 9.6336, 9.5666, 9.5000,\n 9.6000, 9.6996, 9.7987, 9.8974, 9.9957, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n( w ) hile long on amiable monkeys and worthy environmentalism , jane goodall 's wild chimpanzees is short on the thrills the oversize medium demands . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.5175, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.9949, 1.1991, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.8617, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.3698, 0.3225, 0.4593, 0.4121, 0.5477,\n 0.5005, 0.6351, 0.7688, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.4189, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.7816, 0.9027, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.6943, 2.9424, 3.1844, 3.0509, 2.9212,\n 3.1558, 3.0290, 3.2577, 3.1334, 3.3566, 3.5753, 3.7897, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.3618, 4.2426, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.6571, 4.8394, 5.0190, 4.9075,\n 5.0844, 5.2590, 5.1490, 5.3211, 5.4909, 5.6585, 5.5500, 5.7155,\n 5.6086, 5.5035, 5.6667, 5.5630, 5.7242, 5.8835, 5.7812, 5.9386,\n 6.0943, 5.9932, 6.1471, 6.2994, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.3035, 6.2075, 6.1128, 6.2601, 6.4059, 6.5504, 6.6935, 6.5997,\n 6.7414, 6.8819, 7.0211, 6.9282, 7.0662, 7.2029, 7.3386, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.6867, 7.8168, 7.9460, 8.0741,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.5553, 8.6770, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.8430, 9.9542, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.9091, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.2602, 11.3610, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.7200, 11.8176, 11.9147, 12.0114,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.1447, 12.2397, 12.3342, 12.4283,\n 12.3523, 12.4460, 12.5394, 12.6323, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.7585, 12.8499, 12.9410, 13.0316, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nas surreal as a dream and as detailed as a photograph , as visually dexterous as it is at times imaginatively overwhelming . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.1390, -1.1825, -1.0371, -1.0806, -1.1239,\n -0.9802, -1.0235, -0.8811, -0.7396, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998, 3.5796, 3.8497, 4.1111,\n 3.9279, 3.7524, 4.0056, 3.8367, 3.6742, 3.9196, 4.1586, 4.3916, 4.6188,\n 4.4610, 4.3083, 4.5301, 4.3818, 4.5985, 4.4544, 4.3142, 4.1779, 4.3894,\n 4.2563, 4.4634, 4.6667, 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.1854,\n 5.0602, 5.2463, 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 5.8919,\n 5.7735, 5.9438, 5.8275, 5.9954, 5.8812, 5.7689, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.2505, 6.1450, 6.0410, 5.9386, 5.8377,\n 5.7382, 5.8936, 5.7955, 5.9491, 5.8522, 5.7566, 5.6622, 5.8139, 5.7207,\n 5.6286, 5.7785, 5.9270, 6.0740, 6.2197, 6.1283, 6.0380, 6.1820, 6.0927,\n 6.2354, 6.1470, 6.0596, 5.9732, 6.1143, 6.0288, 6.1685, 6.3070, 6.4444,\n 6.5807, 6.7159, 6.8500, 6.9830, 6.8977, 6.8133, 6.7298, 6.6471, 6.7788,\n 6.6968, 6.8274, 6.7462, 6.6658, 6.5861, 6.7155, 6.6365, 6.5583, 6.4807,\n 6.6089, 6.7361, 6.8624, 6.9879, 6.9107, 6.8343, 6.9587, 6.8828, 6.8076,\n 6.7330, 6.8564, 6.9789, 7.1007, 7.0265, 6.9529, 7.0737, 7.0007, 7.1207,\n 7.0481, 6.9762, 6.9048, 7.0238, 6.9529, 7.0711, 7.1886, 7.3054, 7.4215,\n 7.5369, 7.6517, 7.5809, 7.5106, 7.4409, 7.5548, 7.6681, 7.5988, 7.5299,\n 7.6424, 7.7544, 7.8657, 7.7971, 7.7291, 7.8397, 7.7720, 7.8820, 7.8147,\n 7.7480, 7.6816, 7.7908, 7.7249, 7.8335, 7.9415, 8.0490, 8.1560, 8.2624,\n 8.3683, 8.4736, 8.4078, 8.3423, 8.2773, 8.2127, 8.3173, 8.2531, 8.3572,\n 8.2933, 8.2298, 8.1667, 8.2702, 8.2074, 8.1449, 8.0829, 8.1858, 8.2882,\n 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nescaping the studio , piccoli is warmly affecting and so is this adroitly minimalist movie . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.7424, -1.7817,\n -1.6432, -1.5055, -1.5453, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.7025,\n -1.7404, -1.6087, -1.6466, -1.6843, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "89", + "Fraction of T in Greenlist": "44.7%", + "z-score": "6.43", + "p value": "6.57e-11", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962, 4.9010, 4.6268,\n 4.9008, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426, 4.5033, 4.3027, 4.1111,\n 4.3644, 4.6101, 4.4272, 4.2515, 4.4907, 4.7237, 4.5547, 4.7819, 4.6188,\n 4.4610, 4.6829, 4.8999, 4.7469, 4.9592, 4.8107, 5.0186, 4.8742, 4.7336,\n 4.5968, 4.8003, 5.0000, 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997,\n 5.3716, 5.5549, 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738,\n 6.6395, 6.8031, 6.6803, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296, 7.1207,\n 7.0133, 7.1611, 7.0553, 6.9511, 6.8483, 6.7469, 6.6469, 6.7931, 6.9378,\n 6.8391, 6.7416, 6.6454, 6.5504, 6.4566, 6.5997, 6.5069, 6.6486, 6.7890,\n 6.9282, 6.8364, 6.7456, 6.8834, 6.7937, 6.7049, 6.6171, 6.5303, 6.4444,\n 6.5807, 6.7159, 6.6308, 6.5465, 6.4632, 6.3807, 6.2991, 6.4327, 6.3517,\n 6.4842, 6.6157, 6.7462, 6.6658, 6.5861, 6.7155, 6.6365, 6.5583, 6.4807,\n 6.4039, 6.3278, 6.4558, 6.5828, 6.5072, 6.4322, 6.3580, 6.2843, 6.2113,\n 6.3369, 6.2644, 6.3892, 6.5130, 6.6361, 6.5639, 6.4923, 6.6144, 6.5433,\n 6.4728, 6.4028, 6.3333, 6.2644, 6.3853, 6.5054, 6.4368, 6.3688, 6.3013,\n 6.2342, 6.1677, 6.2866, 6.2205, 6.3385, 6.4559, 6.5727, 6.5067, 6.4413,\n 6.5571, 6.4920, 6.4274, 6.3632, 6.2994, 6.2361, 6.3509, 6.4650, 6.4019,\n 6.3392, 6.2770, 6.2152, 6.1537, 6.2668, 6.2057, 6.3180, 6.4298, 6.5410,\n 6.4800, 6.4194, 6.5299, 6.4695, 6.4096, 6.3500, 6.2908, 6.2319, 6.3414,\n 6.4504, 6.3917, 6.3333, 6.2753, 6.2177, 6.1604, 6.2684, 6.2113, 6.3187,\n 6.4256])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthere 's ... tremendous energy from the cast , a sense of playfulness and excitement that seems appropriate . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -1.9829, -2.0309, -2.0785, -1.8958, -1.9437, -1.7635, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.7457,\n -1.5751, -1.4059, -1.4536, -1.2865, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.6881, -0.7346, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.2791, -0.3246, -0.3698, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, -0.1295, 0.0000, -0.0429, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.2909, 0.4145, 0.3721, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "159", + "Fraction of T in Greenlist": "79.9%", + "z-score": "17.9", + "p value": "7.69e-72", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 9.8015, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.4565, 10.3287, 10.4565, 10.5830, 10.7084, 10.8328, 10.9560, 10.8321,\n 10.9546, 11.0762, 11.1967, 11.3163, 11.4349, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.7809, 11.8953, 11.7779, 11.8918, 12.0049, 12.1171, 12.2286,\n 12.3393, 12.2248, 12.3350, 12.4444, 12.5531, 12.6611, 12.7683, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 13.1839, 13.0748, 13.1785, 13.2816,\n 13.3840, 13.4859, 13.5871, 13.4804, 13.5813, 13.6816, 13.7813, 13.8804,\n 13.9790, 13.8745, 13.9728, 14.0705, 14.1677, 14.2644, 14.3605, 14.2581,\n 14.3540, 14.4493, 14.5442, 14.6385, 14.7324, 14.6319, 14.7255, 14.8187,\n 14.9113, 15.0035, 15.0952, 14.9967, 15.0882, 15.1792, 15.2698, 15.3600,\n 15.4498, 15.3530, 15.4425, 15.5316, 15.6203, 15.7086, 15.7965, 15.7014,\n 15.7890, 15.8763, 15.9632, 16.0497, 16.1358, 16.0424, 16.1283, 16.2139,\n 16.2990, 16.3839, 16.4684, 16.3764, 16.4607, 16.5446, 16.6282, 16.7115,\n 16.7944, 16.7039, 16.7866, 16.8690, 16.9511, 17.0328, 17.1143, 17.0251,\n 17.1064, 17.1873, 17.2680, 17.3483, 17.4284, 17.3406, 17.4204, 17.5000,\n 17.5793, 17.6583, 17.7370, 17.6504, 17.7290, 17.8072, 17.8852])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthis illuminating documentary transcends our preconceived vision of the holy land and its inhabitants , revealing the human complexities beneath . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.6222, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.2503, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.3698, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, 0.0000, -0.0449, -0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.1761, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.0623, 4.9316, 5.1241, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.9601, 6.8458, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.0553, 7.2016, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.9861, 8.8889, 9.0139, 8.9178, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 9.8150, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.3409, 10.2514, 10.3630, 10.4738, 10.5841, 10.4956,\n 10.6052, 10.5175, 10.6265, 10.5397, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.5002, 10.4164, 10.3333, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.0004, 11.1018, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.7050, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.9586, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.1141, 12.2068, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe subtle strength of `` elling '' is that it never loses touch with the reality of the grim situation . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "192", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.9%", + "z-score": "-0.667", + "p value": "0.748", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.5238, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.6667])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547, 1.5403, 1.9052,\n 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998, 3.5796, 3.8497, 3.6667,\n 3.4915, 3.7524, 3.5839, 3.4219, 3.6742, 3.9196, 4.1586, 4.0012, 4.2339,\n 4.0814, 3.9337, 3.7905, 4.0166, 4.2378, 4.4544, 4.6664, 4.5260, 4.7336,\n 4.9373, 5.1371, 5.3333, 5.1962, 5.3889, 5.2549, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.2463, 5.1236, 5.0034, 4.8857, 4.7703, 4.6571, 4.5461, 4.4371,\n 4.6188, 4.5115, 4.4061, 4.3026, 4.4809, 4.6568, 4.8305, 5.0019, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.0332, 5.1978, 5.3605, 5.5213, 5.4222, 5.3245,\n 5.2281, 5.1332, 5.2915, 5.4482, 5.3541, 5.5090, 5.4160, 5.5691, 5.7207,\n 5.8707, 6.0193, 5.9270, 6.0740, 5.9827, 6.1283, 6.2725, 6.4153, 6.5569,\n 6.6973, 6.8364, 6.9743, 6.8834, 6.7937, 6.7049, 6.8414, 6.9768, 6.8889,\n 7.0231, 7.1563, 7.2884, 7.4194, 7.3322, 7.4622, 7.5912, 7.7192, 7.8463,\n 7.9724, 7.8859, 8.0111, 7.9254, 8.0497, 7.9649, 7.8808, 8.0042, 7.9209,\n 8.0434, 8.1650, 8.2858, 8.4057, 8.3231, 8.4423, 8.3605, 8.2793, 8.3977,\n 8.3172, 8.2375, 8.3550, 8.2760, 8.1976, 8.1198, 8.2365, 8.1594, 8.2754,\n 8.3906, 8.5052, 8.4286, 8.3526, 8.4664, 8.3910, 8.5041, 8.4293, 8.5417,\n 8.4674, 8.3937, 8.3205, 8.2479, 8.3595, 8.4706, 8.3984, 8.5088, 8.4371,\n 8.3660, 8.4757, 8.5848, 8.6933, 8.6226, 8.5524, 8.4826, 8.4133, 8.5212,\n 8.6284, 8.5595, 8.6662, 8.7724, 8.8780, 8.8094, 8.7414, 8.8464, 8.7788,\n 8.7116, 8.6448, 8.7492, 8.6828, 8.6169, 8.5513, 8.6551, 8.7584, 8.6932,\n 8.7959, 8.7311, 8.6667, 8.7689, 8.8706, 8.9718, 9.0726, 9.1730, 9.1088,\n 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nholm ... embodies the character with an effortlessly regal charisma . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.5453, -1.5848, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 5.3333, 4.9640, 4.6291, 4.3231, 4.0415, 3.7808, 3.5382,\n 3.8411, 3.6148, 3.4017, 3.2004, 3.0096, 3.2998, 3.1177, 3.3968, 3.6667,\n 3.9279, 4.1812, 4.4272, 4.6663, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.6000, 5.4322, 5.2697, 5.1121, 4.9592, 4.8107, 4.6664, 4.5260, 4.3894,\n 4.2563, 4.4634, 4.3333, 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997,\n 5.6830, 5.8635, 6.0413, 6.2164, 6.0883, 6.2610, 6.1355, 6.0125, 5.8919,\n 5.7735, 5.6573, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 6.0928, 6.2505, 6.4065, 6.5607, 6.7132, 6.8641,\n 6.7583, 6.9076, 6.8034, 6.7006, 6.5993, 6.4993, 6.4008, 6.3035, 6.2075,\n 6.1128, 6.0193, 6.1664, 6.0740, 6.2197, 6.3640, 6.5069, 6.6486, 6.7890,\n 6.9282, 7.0662, 7.2029, 7.3386, 7.4730, 7.3810, 7.5143, 7.4233, 7.3333,\n 7.2443, 7.1563, 7.0692, 6.9830, 6.8977, 6.8133, 6.9451, 6.8615, 6.7788,\n 6.6968, 6.8274, 6.7462, 6.6658, 6.5861, 6.7155, 6.6365, 6.5583, 6.4807,\n 6.6089, 6.5320, 6.4558, 6.3803, 6.5072, 6.4322, 6.3580, 6.2843, 6.4101,\n 6.3369, 6.2644, 6.1926, 6.3172, 6.2458, 6.1750, 6.1047, 6.2282, 6.1584,\n 6.0892, 6.0205, 6.1429, 6.0746, 6.0069, 5.9397, 6.0609, 5.9941, 5.9279,\n 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.9065, 5.8424, 5.7787, 5.7155,\n 5.8336, 5.7707, 5.7082, 5.6462, 5.7633, 5.7016, 5.6403, 5.5794, 5.6955,\n 5.6349, 5.5747, 5.5149, 5.6300, 5.5705, 5.5113, 5.4526, 5.5668, 5.5082,\n 5.4501, 5.3923, 5.5056, 5.4480, 5.3909, 5.3340, 5.4464, 5.3898, 5.3335,\n 5.2776, 5.3891, 5.3333, 5.2779, 5.2229, 5.3335, 5.2786, 5.2241, 5.1698,\n 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe title not only describes its main characters , but the lazy people behind the camera as well . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -1.8808, -1.9242, -1.9673, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -1.9245,\n -1.9658, -2.0068, -2.0476, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.7358, -2.7701, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 6.9646, 6.8419, 6.7213, 6.8810, 6.7626, 6.9204, 6.8041,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.4853, 7.6210, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.3625, 8.4868, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.7908, 9.8995, 9.8197, 9.7405,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.2753, 10.3805, 10.4852, 10.4067,\n 10.3289, 10.2516, 10.3557, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.3099, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.2129, 11.3091, 11.2376, 11.1667,\n 11.2624, 11.3577, 11.2872, 11.3820, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit offers little beyond the momentary joys of pretty and weightless intellectual entertainment . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.0667, -2.1193, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.4035, -2.4495, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.6992, -2.7406,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -2.8928, -2.9317, -2.9704,\n -3.0089, -3.0471, -3.0851, -3.1229, -2.9659, -3.0039, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.1433, -3.1794, -3.2152, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.2107, -3.2460, -3.1013, -3.1368, -3.1720, -3.2071,\n -3.2420, -3.2768, -3.3113, -3.3457, -3.3799, -3.4140, -3.4478, -3.4816,\n -3.5151, -3.3754, -3.4091, -3.4427, -3.4760, -3.5093, -3.5424, -3.5753,\n -3.4383, -3.4713, -3.3354, -3.3686, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.5325, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.0456, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 10.9109,\n 11.0254, 10.9229, 11.0368, 11.1500, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.1860, 11.2966, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.7533, 11.6584, 11.5645, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.3277, 12.4286, 12.3391, 12.4395, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.3447, 13.4390, 13.3537, 13.4477, 13.5412,\n 13.4567, 13.5499, 13.6427, 13.5589, 13.6514, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.9343, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.4850, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.5871, 14.6738, 14.7601, 14.6812, 14.7673, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na synthesis of cliches and absurdities that seems positively decadent in its cinematic flash and emptiness . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -0.9802, -1.0235, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.1380, -1.1784, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -0.9816, -1.0215,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.0779, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.1243, 7.0268, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.3901, 7.2960, 7.2029, 7.1110, 7.2466,\n 7.1556, 7.0657, 6.9768, 7.1111, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.8948, 7.8065, 7.7192, 7.8463, 7.7598, 7.6742, 7.8003,\n 7.7155, 7.6315, 7.7566, 7.6734, 7.5910, 7.5094, 7.4286, 7.5526,\n 7.6758, 7.7981, 7.9196, 8.0403, 7.9600, 8.0798, 8.1989, 8.1192,\n 8.0402, 7.9619, 7.8842, 7.8072, 7.9253, 8.0427, 7.9663, 7.8905,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.2772, 8.3910, 8.3162, 8.4293,\n 8.3550, 8.4674, 8.5792, 8.6903, 8.6165, 8.5433, 8.6537, 8.7636,\n 8.8728, 8.8000, 8.9086, 9.0167, 9.1242, 9.2311, 9.3374, 9.4432,\n 9.3708, 9.4761, 9.4042, 9.3328, 9.2619, 9.1915, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.5369, 9.4673, 9.3982, 9.5007, 9.6028, 9.7043,\n 9.8054, 9.9060, 9.8373, 9.7690, 9.7011, 9.6336, 9.7337, 9.8333,\n 9.9325, 10.0312, 10.1295, 10.2273, 10.3248, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na subtle and well-crafted ( for the most part ) chiller . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, 0.0000,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.5134, 0.4652, 0.6029, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.4189, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.0037, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.3033, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 8.9567,\n 9.0863, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 9.9187, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.7843, 10.8960, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.4714, 11.5779, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.6287, 12.5394, 12.6387, 12.7376,\n 12.8359, 12.9337, 13.0311, 12.9430, 13.0400, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.1644, 13.2593, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.5499, 13.6427, 13.5589, 13.6514, 13.7434, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.8219, 14.9086, 14.9950, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.3454, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nhas a lot of the virtues of eastwood at his best . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -2.0203, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.0476, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.0369, -2.0751, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.2860, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.0451, 3.9158, 4.1265, 4.0000,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.3618, 4.2426, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.6571, 4.5461, 4.4371, 4.6188,\n 4.7980, 4.6904, 4.8669, 5.0410, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.5035, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 5.8377, 5.7382, 5.8936, 6.0474, 5.9491, 6.1012, 6.2517, 6.1546,\n 6.3035, 6.4510, 6.5970, 6.7416, 6.8849, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.0211, 6.9282, 6.8364, 6.9743, 7.1110, 7.0201,\n 7.1556, 7.2900, 7.2001, 7.3333, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 7.9724, 7.8859, 7.8003,\n 7.9254, 8.0497, 7.9649, 8.0882, 8.2107, 8.1266, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.7610, 8.8778, 8.9940, 8.9113,\n 8.8294, 8.7482, 8.6677, 8.7831, 8.8978, 8.8179, 8.9319, 9.0452,\n 8.9660, 9.0786, 9.1905, 9.3017, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.6148, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.6008, 9.7072, 9.8131, 9.7380, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.0814, 10.0074, 10.1106, 10.2132, 10.3154, 10.2419, 10.1690, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.1558, 10.2565, 10.3566, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.6111, 10.5410, 10.6389, 10.7363, 10.8333,\n 10.7637, 10.6944, 10.6256, 10.5573, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's hampered by a lifetime-channel kind of plot and a lead actress who is out of her depth . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "12.1%", + "z-score": "-4.22", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -3.0827, -2.9109, -2.9515,\n -2.9917, -3.0317, -3.0714, -3.1109, -3.1500, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.0538, -3.0923, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.6635, -3.6980, -3.7323, -3.5827, -3.6172, -3.6515,\n -3.6856, -3.7196, -3.7534, -3.7870, -3.6407, -3.6745, -3.7082, -3.7417,\n -3.7750, -3.8081, -3.8411, -3.6979, -3.7311, -3.7641, -3.7970, -3.8297,\n -3.8623, -3.8947, -3.9269, -3.9590, -3.9910, -4.0228, -4.0545, -4.0860,\n -4.1174, -3.9793, -4.0109, -4.0423, -4.0736, -4.1048, -4.1358, -4.1667,\n -4.1974, -4.0622, -4.0931, -4.1239, -4.1546, -4.1851, -4.2155])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.1333, 9.2717, 9.1455, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.6813, 9.8064, 9.6995, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.2414, 11.3497, 11.2564, 11.1640, 11.2719, 11.3791, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.6206, 11.7249, 11.6356, 11.7395,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 12.1622, 12.0749, 12.1756,\n 12.0891, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.1957, 13.2895, 13.2068, 13.1246, 13.0431, 13.1367, 13.2299,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.7106, 13.8007, 13.8904, 13.9797, 13.9007, 13.9897, 14.0784, 14.0000,\n 14.0884, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit feels like an after-school special gussied up with some fancy special effects , and watching its rote plot points connect is about as exciting as gazing at an egg timer for 93 minutes . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -1.8071, -1.8524, -1.6865,\n -1.7321, -1.7772, -1.6138, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.9799, -2.0203, -2.0605,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.0866, -2.1254, -1.9837, -2.0226, -2.0613, -1.9211, -1.9599,\n -1.9985, -2.0369, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.0219, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.3473,\n 11.2427, 11.3555, 11.2522, 11.3644, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 12.0223, 11.9213, 12.0286, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.7755, 12.8766, 12.7812, 12.8819, 12.9820, 13.0815, 13.1806, 13.2791,\n 13.3770, 13.4745, 13.3810, 13.4780, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.1510, 14.2433, 14.1543, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.7023, 14.7916, 14.7049, 14.7939, 14.8825, 14.9707,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.4085, 15.4940, 15.4103, 15.4956, 15.5805, 15.6651, 15.7494, 15.8333,\n 15.9169, 16.0002, 15.9178, 16.0009, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfor the most part , director anne-sophie birot 's first feature is a sensitive , extraordinarily well-acted drama . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.2542, -0.3038, -0.3531, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.4280, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.2955, -0.3369, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.0178, 7.8360, 8.0076, 8.1763, 8.3423, 8.1689, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.1036,\n 9.9540, 9.8072, 9.6632, 9.8020, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.4667, 10.3314, 10.4614, 10.5903, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.9634, 11.0870, 11.2094, 11.3308, 11.4512, 11.3244,\n 11.1994, 11.3196, 11.4388, 11.3163, 11.4349, 11.3143, 11.1954, 11.3137,\n 11.4311, 11.5476, 11.4311, 11.5470, 11.6620, 11.7762, 11.6620, 11.7757,\n 11.8885, 12.0005, 11.8885, 12.0000, 12.1107, 12.2207, 12.1107, 12.0020,\n 12.1117, 12.0044, 12.1136, 12.2221, 12.1164, 12.2244, 12.1200, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.2360, 12.3419, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.5604, 12.6635, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.2722, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.4745, 13.3810, 13.4780, 13.5746, 13.6707, 13.7663, 13.6742,\n 13.7694, 13.6781, 13.7730, 13.8675, 13.7772, 13.6876, 13.5987, 13.5105,\n 13.6050, 13.6990, 13.7926, 13.7054, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.2686, 14.1842, 14.2744,\n 14.3642, 14.2805, 14.3700, 14.4591, 14.5479, 14.6362, 14.7242, 14.8119,\n 14.8991, 14.8167, 14.7348, 14.8219, 14.9086, 14.8274, 14.9139, 14.8333,\n 14.7533, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nmr. tsai is a very original artist in his medium , and what time is it there ? \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.9488, 1.1111, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.0370, 1.1877, 1.3373, 1.4857, 1.6330,\n 1.5758, 1.5191, 1.6646, 1.6081, 1.5519, 1.4963, 1.6398, 1.7823,\n 1.9237, 2.0642, 2.0078, 1.9518, 2.0907, 2.0349, 1.9795, 1.9245,\n 2.0617, 2.1980, 2.3333, 2.2780, 2.2230, 2.1685, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.2723, 2.4037, 2.3500, 2.2966, 2.2436, 2.1909,\n 2.1386, 2.0866, 2.0350, 1.9837, 1.9327, 1.8821, 1.8317, 1.9599,\n 1.9097, 2.0369, 2.1634, 2.1131, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 1.9419, 1.8935, 1.8453, 1.7974, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.6496, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "55.6%", + "z-score": "9.9", + "p value": "2.14e-23", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.5560,\n 2.7952, 2.6726, 2.9055, 2.7852, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.3445, 2.5621, 2.7757, 2.9856, 2.8804, 2.7775,\n 2.6765, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.1008, 4.0024, 4.1797, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.0531, 4.2222, 4.3894,\n 4.2981, 4.4630, 4.6262, 4.7875, 4.6967, 4.8561, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.6603, 4.5747, 4.4901, 4.4066, 4.5611, 4.4783,\n 4.3966, 4.5491, 4.7001, 4.6188, 4.7682, 4.9163, 5.0630, 4.9820,\n 5.1273, 5.2713, 5.4140, 5.5556, 5.6959, 5.6149, 5.7540, 5.8919,\n 5.8114, 5.7318, 5.8684, 6.0038, 5.9247, 6.0590, 6.1923, 6.3246,\n 6.4558, 6.5861, 6.7155, 6.8439, 6.7648, 6.8922, 7.0187, 6.9402,\n 7.0658, 6.9879, 7.1125, 7.0353, 6.9587, 6.8828, 6.8076, 6.7330,\n 6.8564, 6.9789, 7.1007, 7.2217, 7.3419, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.7407, 7.6667, 7.7831, 7.7096, 7.8253, 7.7524, 7.8673,\n 7.9816, 7.9091, 8.0227, 8.1356, 8.0636, 7.9921, 8.1043, 8.2158,\n 8.1448, 8.2557, 8.3660, 8.4757, 8.5848, 8.6933, 8.6226, 8.7305,\n 8.8379, 8.9447, 9.0510, 8.9806, 8.9107, 9.0164, 9.1215, 9.2261,\n 9.3302, 9.4338, 9.5369, 9.4673, 9.5698, 9.6719, 9.6028, 9.5341,\n 9.4658, 9.3980, 9.4995, 9.6005, 9.7011, 9.8012, 9.9008, 9.8333,\n 9.9325, 10.0312, 9.9641, 9.8974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nsade is an engaging look at the controversial eponymous and fiercely atheistic hero . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.1022, -1.1500, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.3608, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.5820, 2.3938, 2.2156, 2.5281, 2.8284, 2.6558, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 3.0072, 3.2660, 3.1156, 3.3665, 3.2205, 3.0792,\n 2.9424, 2.8098, 2.6811, 2.9212, 3.1558, 3.0290, 3.2577, 3.1334, 3.0123,\n 2.8943, 3.1160, 3.3333, 3.2167, 3.1027, 3.3147, 3.5228, 3.7273, 3.9284,\n 3.8146, 3.7033, 3.5942, 3.4873, 3.6831, 3.5777, 3.4743, 3.6662, 3.8552,\n 3.7528, 3.6522, 3.8376, 4.0205, 3.9208, 3.8228, 4.0024, 4.1797, 4.0825,\n 4.2571, 4.1612, 4.0667, 3.9736, 3.8819, 3.7916, 3.9624, 4.1312, 4.0415,\n 4.2080, 4.1192, 4.0316, 3.9452, 4.1090, 4.2710, 4.1851, 4.1003, 4.2601,\n 4.4182, 4.5747, 4.7296, 4.6448, 4.5611, 4.4783, 4.3966, 4.5491, 4.4680,\n 4.3879, 4.5384, 4.6876, 4.6079, 4.5291, 4.6765, 4.8226, 4.7442, 4.6667,\n 4.8111, 4.9543, 4.8772, 5.0190, 4.9424, 4.8666, 4.7916, 4.7173, 4.6437,\n 4.7834, 4.9221, 4.8488, 4.9862, 4.9135, 4.8414, 4.7700, 4.9058, 5.0406,\n 4.9695, 4.8990, 5.0325, 5.1650, 5.2965, 5.4272, 5.3567, 5.2868, 5.2175,\n 5.1488, 5.2779, 5.2096, 5.1419, 5.2699, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.5138, 5.4471, 5.3810, 5.5051, 5.6285, 5.5626, 5.6851, 5.6195, 5.5544,\n 5.4899, 5.4257, 5.3621, 5.4832, 5.6036, 5.5402, 5.6598, 5.5967, 5.5340,\n 5.4718, 5.5904, 5.7082, 5.6462, 5.5846, 5.7016, 5.8179, 5.9336, 6.0487,\n 5.9871, 5.9258, 5.8650, 5.8046, 5.9186, 5.8585, 5.7987, 5.9120, 6.0246,\n 5.9651, 5.9059, 6.0177, 6.1290, 6.0700, 6.0113, 6.1219, 6.2319, 6.1734,\n 6.2828, 6.2246, 6.1667, 6.2753, 6.2177, 6.1604, 6.2684, 6.2113, 6.3187,\n 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nso devoid of any kind of intelligible story that it makes films like xxx and collateral damage seem like thoughtful treatises \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.0642, -0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.4506, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.1667,\n -1.2052, -1.0777, -1.1163, -0.9897, -1.0284, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 6.0125, 5.8919, 5.7735,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.4909, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.7242, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.7382, 5.8936, 6.0474, 6.1996, 6.3502, 6.2517, 6.1546,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.6454, 6.5504, 6.6935, 6.8354,\n 6.9759, 6.8819, 7.0211, 6.9282, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.1556, 7.2900, 7.4233, 7.5556, 7.6867, 7.5967, 7.7268, 7.6376,\n 7.7667, 7.6785, 7.8065, 7.9336, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.1354, 8.0497, 8.1731, 8.2956, 8.4173, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.7952, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.9067, 9.8293, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.9940, 9.9184, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.6927, 10.6187,\n 10.7189, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.0913, 11.1883, 11.2848, 11.3809, 11.3091, 11.4047, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na tender , heartfelt family drama . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.1406, -1.1896, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.3416, -1.3862,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.0212, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.1118, 5.9954, 5.8812, 5.7689, 5.9346, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.6667, 5.8279, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 8.0829, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.6828, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.6484, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 10.8749, 10.9773, 11.0793, 11.0004, 11.1018, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.4244, 11.3468, 11.4459, 11.3688, 11.4674, 11.5655, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.3705, 12.4638, 12.5568, 12.6494, 12.5745, 12.6667,\n 12.5923, 12.6841, 12.6102, 12.7017, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\n... a hollow joke told by a cinematic gymnast having too much fun embellishing the misanthropic tale to actually engage it . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.3333, -0.3797, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.4147, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.2955, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.2894, -0.3299, -0.3702, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "200", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.5%", + "z-score": "11.9", + "p value": "4.61e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.5627, 2.4351, 2.6811, 2.5560,\n 2.7952, 2.6726, 2.9055, 2.7852, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.3147, 3.5228, 3.7273, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 5.0190, 4.9075,\n 5.0844, 4.9747, 5.1490, 5.0410, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.2981, 5.1978, 5.3605, 5.5213, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.9853, 8.1176, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.5902, 9.5054,\n 9.4213, 9.5346, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 9.8150,\n 9.9249, 9.8431, 9.9524, 10.0611, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.6196, 10.5393, 10.6439, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.7175, 10.6397, 10.5625, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 11.0177, 11.1173, 11.0418, 11.1410,\n 11.0661, 10.9917, 11.0904, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.3572, 11.2848, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.6179, 11.7120, 11.6411, 11.7347, 11.8280, 11.9208])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe cold turkey would 've been a far better title . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -1.8676, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.7913, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.6432,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.5055, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.2445, -1.2839, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.2566, -1.1279, -1.1667,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "38.6%", + "z-score": "2.85", + "p value": "0.00217", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.4389, 1.6330,\n 1.8245, 2.0135, 1.9333, 1.8543, 2.0397, 2.2226, 2.4034, 2.5820,\n 2.7585, 2.9329, 2.8518])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nmanages to be both repulsively sadistic and mundane . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.4288, 0.5871, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.5277, 5.3468, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.4261, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.5264, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.3113, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.5985, 13.6896, 13.7803, 13.6999, 13.7904,\n 13.8804, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.2546, 14.1764, 14.0986, 14.0214, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's just disappointingly superficial -- a movie that has all the elements necessary to be a fascinating , involving character study , but never does more than scratch the surface . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -1.9215, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -1.7942, -1.5759, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.4444, -2.2673, -2.3120, -2.3564, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.4467, -2.2813, -2.3238, -2.1602, -2.2030, -2.2454,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.3825, -2.2287, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.4037, -2.4421, -2.4803, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.3443, -2.3822, -2.2406, -2.0998, -2.1381,\n -2.1762, -2.0369, -2.0751, -2.1131, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.0282, -2.0656, -2.1028, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -1.9843, -1.8527, -1.8898, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.4370, 7.3147, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.6033, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.0000, 8.1291, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.9178, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.7242, 10.6439, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.1933, 11.1173, 11.0418, 11.1410,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthis is a story of two misfits who do n't stand a chance alone , but together they are magnificent . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.4949, -0.5347, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.3033, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.1111, 9.2351, 9.3582, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 9.8150, 9.7224, 9.8389, 9.7473, 9.8632, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.1391, 10.2514, 10.3630, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.2348, 10.1494, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.8867, 10.9906, 10.9091, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 10.9773, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.2250,\n 11.3249, 11.4244, 11.3468, 11.2698, 11.3688, 11.4674, 11.5655, 11.4891,\n 11.4132, 11.5109, 11.6082, 11.5329, 11.6297, 11.7261, 11.6514, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.7169, 11.6441, 11.5718, 11.6667,\n 11.5948, 11.5235, 11.4525, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nschaeffer has to find some hook on which to hang his persistently useless movies , and it might as well be the resuscitation of the middle-aged character . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.7433, 0.9396, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 1.0719, 1.0070, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.2492, 1.4059, 1.5613, 1.5010, 1.4412, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.6591, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.2611, 1.4071, 1.5519, 1.6958, 1.8385, 1.7823,\n 1.7264, 1.8676, 1.8119, 1.9518, 1.8962, 1.8411, 1.7864, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.5303, 1.6641, 1.7970, 1.9291, 1.8773, 1.8257,\n 1.9566, 1.9052, 2.0350, 1.9837, 1.9327, 1.8821, 2.0105, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.7693, 1.8935, 2.0170, 1.9686, 1.9206, 2.0430,\n 1.9950, 2.1167, 2.0688, 2.0212, 1.9738, 2.0943, 2.0470, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 6.1107, 5.9588, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.4878,\n 7.3773, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.4853, 7.6210, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 8.9763, 8.8833, 9.0057, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.5400, 9.4501, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.3320, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.4164, 10.5238, 10.6306, 10.7367, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.7006, 10.6196, 10.5393, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 10.9220, 10.8443, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.7415, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.4581, 11.3837, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.5261, 11.4533, 11.3809, 11.3091, 11.4047, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe primitive force of this film seems to bubble up from the vast collective memory of the combatants . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 5.9628, 5.8398, 6.0125, 5.8919, 6.0622,\n 5.9438, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.0553, 6.9511, 6.8483, 6.7469, 6.6469,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.6466, 7.7778, 7.9079, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.2733, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.1735, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.2885, 9.2118, 9.3212, 9.4299, 9.3537, 9.2782, 9.3863, 9.3113,\n 9.4188, 9.3443, 9.2704, 9.3774, 9.4837, 9.4103, 9.3374, 9.4432,\n 9.3708, 9.4761, 9.4042, 9.3328, 9.4375, 9.5416, 9.4707, 9.4002,\n 9.5038, 9.6069, 9.5369, 9.6394, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.8054, 9.9060, 10.0061, 10.1058, 10.2050, 10.1363, 10.0679, 10.0000,\n 10.0987, 10.0312, 10.1295, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\non this tricky topic , tadpole is very much a step in the right direction , with its blend of frankness , civility and compassion . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.7%", + "z-score": "-0.0821", + "p value": "0.533", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.3802, 0.3244, 0.4845, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.0133, 6.9076, 7.0553, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.6064, 7.5143, 7.4233, 7.5556, 7.6867, 7.8168, 7.7268, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 9.0060, 8.9178, 8.8304, 8.9496, 8.8631, 8.9815,\n 8.8958, 8.8108, 8.9285, 9.0453, 9.1615, 9.0773, 8.9940, 8.9113,\n 8.8294, 8.9448, 8.8636, 8.7831, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 9.9562, 9.8776, 9.9846, 9.9067, 9.8293, 9.9357, 10.0416,\n 9.9648, 9.8887, 9.8131, 9.7380, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.9038, 9.8303, 9.7574, 9.6850, 9.7886, 9.7167, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.6544, 10.5833, 10.6817, 10.7795, 10.7090, 10.8064, 10.7363, 10.6667,\n 10.5974, 10.6944, 10.6256, 10.7222, 10.8184, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe script kicks in , and mr. hartley 's distended pace and foot-dragging rhythms follow . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.9869, 0.9152, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 1.0319, 0.9733, 0.9152, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 1.0284, 0.9759, 0.9238, 1.0659, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 1.0735, 1.0235, 1.1593, 1.1094, 1.0598, 1.0105, 0.9615, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.0215, 0.9742, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 1.0788, 1.0328, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.1667,\n 1.1221, 1.2435, 1.1990, 1.1547, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "86", + "Fraction of T in Greenlist": "43.2%", + "z-score": "5.93", + "p value": "1.47e-09", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284, 2.6558, 2.9439, 2.7778,\n 3.0551, 3.3235, 3.1623, 3.4219, 3.6742, 3.5176, 3.7626, 3.6108, 3.8490,\n 3.7017, 3.9337, 4.1603, 4.3818, 4.5985, 4.8107, 4.6664, 4.8742, 4.7336,\n 4.5968, 4.4634, 4.3333, 4.5363, 4.4091, 4.2848, 4.1633, 4.3618, 4.5569,\n 4.4374, 4.3205, 4.2060, 4.0937, 3.9837, 4.1740, 4.0657, 3.9595, 4.1461,\n 4.0415, 3.9386, 4.1219, 4.0205, 3.9208, 3.8228, 3.7264, 3.6315, 3.5382,\n 3.7166, 3.8927, 3.8000, 3.9736, 3.8819, 3.7916, 3.7025, 3.6148, 3.5283,\n 3.4429, 3.6122, 3.7796, 3.6947, 3.8600, 4.0234, 4.1851, 4.1003, 4.0166,\n 4.1761, 4.0931, 4.0112, 4.1684, 4.3241, 4.2426, 4.3966, 4.3158, 4.2359,\n 4.1569, 4.3086, 4.4590, 4.3804, 4.3027, 4.4511, 4.3740, 4.2977, 4.4444,\n 4.3687, 4.5140, 4.6580, 4.5826, 4.7252, 4.8666, 4.7916, 4.9317, 4.8572,\n 4.7834, 4.7104, 4.8488, 4.9862, 5.1225, 5.2578, 5.3921, 5.3189, 5.4521,\n 5.3793, 5.3072, 5.2358, 5.1650, 5.2965, 5.2262, 5.1564, 5.0873, 5.2175,\n 5.3468, 5.2779, 5.2096, 5.1419, 5.0747, 5.0080, 5.1357, 5.0694, 5.0037,\n 5.1303, 5.0649, 5.0000, 5.1255, 5.0609, 4.9969, 4.9333, 4.8702, 4.8076,\n 4.7455, 4.8693, 4.9923, 4.9303, 5.0525, 4.9908, 4.9295, 4.8687, 4.8083,\n 4.7483, 4.6887, 4.8093, 4.9292, 4.8698, 4.9889, 5.1073, 5.2251, 5.1657,\n 5.1066, 5.2235, 5.1647, 5.1063, 5.2223, 5.3377, 5.2795, 5.3941, 5.3361,\n 5.2784, 5.2211, 5.3349, 5.4480, 5.3909, 5.3340, 5.4464, 5.3898, 5.3335,\n 5.4451, 5.3891, 5.5000, 5.6104, 5.5545, 5.6643, 5.7735, 5.7177, 5.8263,\n 5.9345])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nyou wonder why enough was n't just a music video rather than a full-length movie . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.0526, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.4421, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.8887, -2.9241, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.7894, -2.6534, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -3.0338, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.4207, 3.2863,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.4528, 3.3333,\n 3.2167, 3.1027, 3.3147, 3.5228, 3.7273, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.8759, 3.7700, 3.6662, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.0205, 3.9208, 3.8228, 4.0024, 3.9056, 4.0825,\n 4.2571, 4.4296, 4.3333, 4.2385, 4.4083, 4.5760, 4.4820, 4.6476,\n 4.5547, 4.7181, 4.6262, 4.7875, 4.6967, 4.8561, 4.7662, 4.6775,\n 4.8347, 4.9904, 4.9023, 5.0562, 5.2086, 5.3594, 5.5088, 5.4212,\n 5.5690, 5.7155, 5.8606, 6.0044, 5.9172, 6.0596, 5.9732, 5.8878,\n 5.8034, 5.9442, 6.0838, 6.2222, 6.3595, 6.2755, 6.4116, 6.5465,\n 6.6804, 6.8133, 6.9451, 7.0759, 6.9923, 6.9094, 7.0391, 7.1678,\n 7.2956, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 7.9608,\n 7.8791, 8.0006, 8.1214, 8.2413, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.7831, 8.8978, 8.8179, 8.7388, 8.8527,\n 8.9660, 9.0786, 9.0000, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.2118, 9.3212, 9.2450, 9.3537, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.8433, 9.7688, 9.8736, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 10.8770, 10.9740, 10.9034, 11.0000,\n 11.0961, 11.0261, 11.1218, 11.2171, 11.1475, 11.2424, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nif you 're hard up for raunchy college humor , this is your ticket right here . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.7143, -0.7593, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.7396, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.8783, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 1.0328,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 1.1946, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.3373, 1.4857, 1.6330,\n 1.5758, 1.5191, 1.6646, 1.6081, 1.5519, 1.6958, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.4662, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.7679, 1.7158, 1.6641, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.7237, 1.6732, 1.6230, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.5848, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.8660, 1.8175, 1.7693, 1.8935, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.8252, 1.7780, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.8145, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na fast , funny , highly enjoyable movie . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.2357,\n 0.4103, 0.3499, 0.5222, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.6558, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 0.8208, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.8165, 0.9497, 1.0820, 1.2136, 1.1651, 1.1169, 1.2472,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 1.0890, 1.0444, 1.0000,\n 1.1221, 1.0777, 1.0336, 0.9897, 0.9461, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.0483, 7.9495, 7.8520, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.2435, 9.1553, 9.0680, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.3617, 9.4763, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.2554, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.4608, 9.3810, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.3827, 10.3065, 10.4097, 10.5123,\n 10.4367, 10.3617, 10.2872, 10.3893, 10.4909, 10.4170, 10.5181, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.5235, 11.4525, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ngood old-fashioned slash-and-hack is back ! \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -1.7321,\n -1.7903, -1.5635, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.2197, -2.2608, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.0476, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.1640, -2.2024, -2.0613, -1.9211, -1.9599,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.3094, -2.3447, -2.2156, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495, 2.1170,\n 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570, 2.1939, 2.0381, 1.8889,\n 1.7457, 1.6082, 1.8974, 1.7628, 2.0412, 1.9096, 2.1783, 2.0494, 2.3094,\n 2.1831, 2.0605, 1.9415, 2.1909, 2.4345, 2.3163, 2.5533, 2.4371, 2.6681,\n 2.5538, 2.7791, 2.6667, 2.8868, 2.7761, 2.6679, 2.5621, 2.4585, 2.3570,\n 2.5690, 2.4689, 2.6765, 2.5775, 2.7811, 2.6833, 2.8830, 2.7863, 2.6914,\n 2.5981, 2.7928, 2.9848, 2.8919, 3.0806, 2.9887, 3.1743, 3.0833, 3.2660,\n 3.1760, 3.3558, 3.2667, 3.1789, 3.0924, 3.0071, 2.9231, 3.0984, 3.0151,\n 3.1879, 3.1052, 3.2757, 3.1937, 3.3619, 3.2806, 3.2004, 3.1211, 3.0429,\n 2.9656, 3.1300, 3.0533, 3.2157, 3.1394, 3.2998, 3.2242, 3.3826, 3.3075,\n 3.2332, 3.1597, 3.0870, 3.0151, 3.1704, 3.0989, 3.2525, 3.1814, 3.3333,\n 3.2627, 3.4130, 3.3428, 3.2733, 3.2044, 3.3526, 3.4995, 3.4308, 3.5762,\n 3.5079, 3.6519, 3.5839, 3.7265, 3.6590, 3.8002, 3.7330, 3.6664, 3.6004,\n 3.5350, 3.4701, 3.6091, 3.5446, 3.6824, 3.6181, 3.7547, 3.6908, 3.8262,\n 3.7626, 3.6995, 3.6369, 3.5748, 3.5132, 3.6466, 3.5853, 3.7176, 3.6566,\n 3.7878, 3.7270, 3.8571, 3.7966, 3.7366, 3.6770, 3.6178, 3.5590, 3.6874,\n 3.6289, 3.7563, 3.6980, 3.8244, 3.7664, 3.8919, 3.8341, 3.7766, 3.7196,\n 3.6629, 3.6067, 3.7306, 3.6745, 3.7975, 3.7417, 3.8638, 3.8081, 3.9294,\n 3.8740, 3.8189, 3.7641, 3.7097, 3.6556, 3.7755, 3.7216, 3.8406, 3.7869,\n 3.9052, 3.8516, 3.9691, 3.9158, 3.8627, 3.8100, 3.7576, 3.7055, 3.8216,\n 3.7697, 3.8851, 3.8333, 3.9481, 3.8964, 4.0105, 3.9590, 3.9078, 3.8569,\n 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthis one is definitely one to skip , even for horror movie fanatics . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.8034, 1.6859, 1.9415, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.6348, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.3856, 1.5511, 1.7150, 1.6498, 1.5852,\n 1.7467, 1.6823, 1.8419, 1.7778, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.8682, 2.0197, 1.9582, 1.8974,\n 2.0470, 1.9863, 2.1344, 2.0739, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 1.9242, 2.0682, 2.0101, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.9237, 1.8676, 2.0078, 1.9518, 2.0907, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.9524, 1.8983, 1.8446, 1.9799, 1.9263, 1.8732,\n 2.0071, 1.9540, 2.0868, 2.0339, 2.1656, 2.1128, 2.0604, 2.0083,\n 1.9566, 1.9052, 2.0350, 1.9837, 1.9327, 2.0613, 2.0105, 1.9599,\n 2.0873, 2.0369, 2.1634, 2.1131, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 2.1145, 2.0656, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.9950, 1.9473, 2.0688, 2.0212, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 1.9068, 2.0259, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.1857, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.0667, 7.9472, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.4540, 8.5915, 8.4801, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.2376, 9.1343, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 9.9187, 10.0380,\n 9.9392, 9.8414, 9.7447, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.3695, 10.2763, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 10.8916,\n 10.9998, 10.9107, 10.8224, 10.7349, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.3043, 11.2194, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.7217, 11.6412, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.0712, 12.1677, 12.0893, 12.0114,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.6153, 12.7082, 12.6323, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 13.0316, 13.1219, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfor all its impressive craftsmanship , and despite an overbearing series of third-act crescendos , lily chou-chou never really builds up a head of emotional steam . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -0.7333, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.5695, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -0.8422, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "149", + "Fraction of T in Greenlist": "74.9%", + "z-score": "16.2", + "p value": "1.15e-59", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 5.9214, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.1241, 7.3131, 7.4983, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 8.1882, 8.3557, 8.5206, 8.6828, 8.8426, 9.0000,\n 9.1551, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.6838, 9.8254, 9.9653, 10.1036,\n 9.9540, 10.0915, 10.2275, 10.0820, 10.2172, 10.3510, 10.4834, 10.6145,\n 10.4739, 10.6043, 10.4667, 10.3314, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.9634, 11.0870, 11.2094, 11.3308, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 11.7978, 11.9138, 12.0289, 12.1432, 12.0208,\n 12.1346, 12.0142, 11.8953, 11.7779, 11.8918, 12.0049, 12.1171, 12.0021,\n 12.1139, 12.2248, 12.3350, 12.2222, 12.3319, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.7597, 12.6509, 12.5434, 12.6496, 12.7551, 12.8599,\n 12.9641, 12.8586, 12.9624, 13.0656, 13.1681, 13.0644, 12.9616, 12.8598,\n 12.9624, 13.0643, 13.1657, 13.0655, 13.1665, 13.2669, 13.3667, 13.2680,\n 13.3674, 13.2698, 13.3689, 13.4674, 13.5654, 13.6629, 13.7599, 13.6640,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.9530, 13.8593, 13.9543, 14.0488,\n 14.1428, 14.0503, 13.9585, 13.8675, 13.9615, 14.0550, 14.1481, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.2464, 14.3380, 14.2499, 14.3412, 14.4321,\n 14.5226, 14.6126, 14.7023, 14.6155, 14.5293, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.4085, 15.3247, 15.4103, 15.4956, 15.5805, 15.4976, 15.5823, 15.6667,\n 15.7507, 15.8344, 15.9178, 16.0009, 16.0836, 16.1660, 16.2481])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nexquisitely nuanced in mood tics and dialogue , this chamber drama is superbly acted by the deeply appealing veteran bouquet and the chilling but quite human berling . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "105", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "25.7%", + "z-score": "0.169", + "p value": "0.433", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.0574, 0.1143, 0.0569, 0.2265,\n 0.1690])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "63.1%", + "z-score": "12.4", + "p value": "1.46e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.5727,\n 6.7625, 6.9488, 7.1317, 6.9631, 7.1435, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.2168, 7.0711, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.2178, 8.3521, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.1291, 8.2572, 8.1651, 8.0741,\n 8.2012, 8.1111, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.0370, 9.1553, 9.0680, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.2250,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.4132, 11.3378, 11.4356, 11.3608, 11.4581, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.6217, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.2068, 12.2992, 12.3912])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nuses high comedy to evoke surprising poignance . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.1019, -2.1602,\n -2.2177, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.1170, -2.1678, -2.2180, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -3.0923, -2.9317, -2.9704,\n -3.0089, -3.0471, -3.0851, -3.1229, -3.1604, -3.1977, -3.2348, -3.0792,\n -3.1165, -3.1536, -3.1905, -3.2271, -3.2636, -3.2998, -3.3359, -3.3717,\n -3.2206, -3.2567, -3.2925, -3.3282, -3.3637, -3.3989, -3.4340, -3.4689,\n -3.5036, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.4879, -3.5218, -3.5555, -3.5890, -3.6224, -3.6556,\n -3.6887, -3.7216, -3.7543, -3.7869, -3.8194, -3.8516, -3.8838, -3.9158,\n -3.9476, -3.8100, -3.8420, -3.7055, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.5325, -3.3990, -3.4316, -3.4641, -3.4964, -3.3645, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "162", + "Fraction of T in Greenlist": "81.4%", + "z-score": "18.4", + "p value": "1.02e-75", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.6066,\n 10.7277, 10.8477, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.5492,\n 11.6631, 11.5519, 11.6652, 11.7778, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.4365, 12.5434, 12.6496, 12.7551, 12.8599,\n 12.9641, 13.0677, 13.1707, 13.2730, 13.3747, 13.2701, 13.3714, 13.4722,\n 13.5724, 13.6720, 13.7710, 13.8695, 13.9675, 14.0649, 14.1618, 14.0601,\n 14.1567, 14.2527, 14.3483, 14.4433, 14.5379, 14.6319, 14.7255, 14.8187,\n 14.9113, 14.8124, 14.9048, 14.9967, 15.0882, 15.1792, 15.2698, 15.3600,\n 15.4498, 15.5391, 15.6280, 15.5316, 15.6203, 15.7086, 15.7965, 15.8840,\n 15.9711, 16.0578, 16.1441, 16.2301, 16.3156, 16.2216, 16.3070, 16.3920,\n 16.4767, 16.5610, 16.6450, 16.7286, 16.8118, 16.8948, 16.9774, 16.8855,\n 16.9680, 17.0500, 17.1318, 17.2133, 17.2944, 17.3752, 17.4557, 17.5359,\n 17.6158, 17.5260, 17.6058, 17.6852, 17.7643, 17.8432, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.1453, 18.2226, 18.2996, 18.3763])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\none of creepiest , scariest movies to come along in a long , long time , easily rivaling blair witch or the others . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "22", + "Fraction of T in Greenlist": "11.1%", + "z-score": "-4.54", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.9109, -2.9515,\n -2.9917, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.4403, -3.2810, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.6068, -3.6420, -3.6770, -3.7117, -3.7463,\n -3.7808, -3.8150, -3.8490, -3.6980, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.9673, -4.0003, -4.0330, -4.0656, -4.0980,\n -4.1303, -4.1624, -4.1944, -4.2262, -4.2578, -4.2893, -4.3207, -4.3519,\n -4.2094, -4.2409, -4.2722, -4.3033, -4.3343, -4.3652, -4.3959, -4.4265,\n -4.4570, -4.4873, -4.5175, -4.5476, -4.5776, -4.4399, -4.4700, -4.5000,\n -4.5299, -4.5596, -4.5893, -4.6188, -4.4837, -4.5134, -4.5429])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.1603, 4.0166,\n 3.8772, 3.7417, 3.9620, 4.1779, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.1236, 5.0034, 4.8857, 4.7703, 4.9528, 5.1326, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.4909, 5.6585, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.5993, 6.7469, 6.6469,\n 6.5483, 6.6944, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.2532, 7.3901, 7.5258, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.8699, 7.7778, 7.9079, 7.8168, 7.7268, 7.6376,\n 7.5494, 7.4622, 7.3758, 7.5048, 7.6328, 7.5472, 7.6742, 7.5895,\n 7.5056, 7.4225, 7.5484, 7.4661, 7.3845, 7.5094, 7.4286, 7.5526,\n 7.6758, 7.7981, 7.9196, 7.8393, 7.7597, 7.8803, 8.0002, 8.1192,\n 8.2375, 8.3550, 8.2760, 8.3927, 8.3143, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.7681, 8.8800, 8.9912,\n 8.9151, 9.0257, 8.9502, 9.0601, 8.9851, 8.9107, 8.8369, 8.9461,\n 8.8728, 8.8000, 8.9086, 8.8364, 8.9444, 9.0518, 9.1587, 9.0869,\n 9.1932, 9.1218, 9.2276, 9.3328, 9.2619, 9.1915, 9.2961, 9.2261,\n 9.3302, 9.4338, 9.3642, 9.4673, 9.5698, 9.6719, 9.7735, 9.8746,\n 9.8054, 9.9060, 9.8373, 9.9374, 10.0371, 10.1363, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.2949, 10.3923, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\na string of rehashed sight gags based in insipid vulgarity . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "152", + "Fraction of T in Greenlist": "76.4%", + "z-score": "16.7", + "p value": "3.39e-63", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.4293, 8.2281, 8.3984,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 10.1585, 9.9969, 10.1368, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 10.7918, 10.9222, 11.0513, 10.9030, 11.0315, 11.1588,\n 11.0145, 10.8727, 11.0000, 11.1261, 10.9878, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.7104, 11.5799, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 12.0386, 12.1533, 12.0289, 12.1432, 12.2565,\n 12.1346, 12.2474, 12.3595, 12.4708, 12.5812, 12.4622, 12.3447, 12.4550,\n 12.5646, 12.6735, 12.7815, 12.8889, 12.9955, 12.8813, 12.9875, 13.0931,\n 12.9807, 13.0859, 13.1904, 13.2942, 13.3974, 13.2873, 13.1785, 13.2816,\n 13.3840, 13.4859, 13.5871, 13.6878, 13.7878, 13.6816, 13.7813, 13.8804,\n 13.7757, 13.8745, 13.9728, 14.0705, 14.1677, 14.0649, 13.9630, 14.0601,\n 14.1567, 14.2527, 14.3483, 14.4433, 14.5379, 14.4381, 14.5324, 14.6262,\n 14.5277, 14.6212, 14.7143, 14.8069, 14.8990, 14.8021, 14.7060, 14.7981,\n 14.8896, 14.9808, 15.0715, 15.1618, 15.2517, 15.1574, 15.2470, 15.3362,\n 15.2430, 15.3320, 15.4206, 15.5087, 15.5965, 15.5046, 15.4135, 15.5012,\n 15.5885, 15.6754, 15.7619, 15.8481, 15.9339, 15.8443, 15.9299, 16.0151,\n 15.9264, 16.0115, 16.0961, 16.1805, 16.2644, 16.1769, 16.0900, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.5062, 16.5884, 16.5028, 16.5849, 16.6667,\n 16.5819, 16.6634, 16.7447, 16.8256, 16.9063, 16.8225, 16.7393])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\namong the year 's most intriguing explorations of alientation . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.3632, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.6941, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.4495, -2.4951, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.8368,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -2.8701, -2.9109, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -2.9762, -3.0151, -3.0538, -3.0923, -3.1305, -2.9704,\n -2.8116, -2.6540, -2.6934, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.9600, -2.9971,\n -2.8472, -2.6984, -2.7361, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.9575, -2.9933, -3.0290,\n -2.8868, -2.9225, -2.9581, -2.9935, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -2.9320, -2.9661, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.6227, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.6210, 7.7555, 7.6603, 7.7937, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 9.0060, 8.9178, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.7869, 9.8975, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.0611, 10.1692, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.6683, 10.5893,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.3608, 11.4581, 11.5549, 11.6514, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.7901, 11.8849, 11.8117, 11.9060, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.0419, 12.1347, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe movie fails to live up to the sum of its parts . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 6.8995, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.5612, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.5795, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 9.8877, 9.7980,\n 9.7091, 9.8236, 9.7356, 9.6484, 9.7622, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.7312, 9.8430, 9.7590, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.3688, 11.2924, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.6297, 11.7261, 11.6514, 11.5771,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.7169, 11.8117, 11.9060, 11.8333,\n 11.7611, 11.6893, 11.7833, 11.7120, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe son 's room is a triumph of gentility that earns its moments of pathos . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.9812, 0.9258, 0.8709, 1.0206,\n 0.9658, 0.9115, 0.8575, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 0.9245, 1.0598, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 1.0338, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 1.1038, 1.0565, 1.0096, 1.1380, 1.0911, 1.0445,\n 0.9981, 1.1251, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.0106, 0.9659, 1.0890, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.0336, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.1855,\n 4.9652, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.0167, 7.8928,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.7482, 8.6459, 8.7757, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.5795, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.7473, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.6733, 10.7828, 10.6936,\n 10.6052, 10.5175, 10.6265, 10.5397, 10.4537, 10.3683, 10.4769, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.6145, 10.5366, 10.4594, 10.5625, 10.4858, 10.4097, 10.5123,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 10.8673, 10.9669,\n 10.8925, 10.8186, 10.9178, 11.0165, 11.1148, 11.0414, 10.9685, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthere is nothing outstanding about this film , but it is good enough and will likely be appreciated most by sailors and folks who know their way around a submarine . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "10.1%", + "z-score": "-4.87", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.3190, -3.3619, -3.4044, -3.2004,\n -3.2435, -3.2863, -3.3288, -3.3708, -3.4125, -3.2157, -3.2579, -3.2998,\n -3.3414, -3.3826, -3.4235, -3.4641, -3.5044, -3.3156, -3.3564, -3.3968,\n -3.4370, -3.4768, -3.5163, -3.3333, -3.3733, -3.4130, -3.4524, -3.4915,\n -3.5303, -3.5689, -3.6072, -3.4308, -3.4694, -3.5079, -3.5460, -3.5839,\n -3.4116, -3.4499, -3.4879, -3.5256, -3.5631, -3.6004, -3.6374, -3.6742,\n -3.7108, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.7626,\n -3.7981, -3.8335, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -3.8490,\n -3.8837, -3.9181, -3.9524, -3.9865, -4.0204, -4.0541, -4.0876, -4.1210,\n -4.1542, -4.1872, -4.2200, -4.2527, -4.2852, -4.1338, -4.1666, -4.1992,\n -4.2316, -4.2639, -4.2960, -4.1477, -4.1800, -4.2122, -4.2443, -4.2762,\n -4.3079, -4.3395, -4.3710, -4.2262, -4.2578, -4.2893, -4.3207, -4.3519,\n -4.3830, -4.4140, -4.4448, -4.4754, -4.5060, -4.5364, -4.5666, -4.5968,\n -4.6268, -4.4873, -4.5175, -4.5476, -4.5776, -4.6074, -4.6371, -4.6667,\n -4.6961, -4.7255, -4.7547, -4.7838, -4.8127, -4.8416, -4.8703])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "85", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "65.9%", + "z-score": "8.7", + "p value": "1.59e-18", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 4.6188, 4.9010, 5.1711,\n 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569, 5.4271, 5.6614, 5.8889,\n 5.6737, 5.4678, 5.2705, 5.4958, 5.3072, 5.5277, 5.3468, 5.5626, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.5727, 6.4019, 6.2361, 6.4273, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.0711,\n 7.2400, 7.4066, 7.2648, 7.4294, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376,\n 7.7942, 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.1881, 8.0667, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206, 8.4037,\n 8.5435, 8.6820, 8.5672, 8.7045])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthis is a train wreck of an action film -- a stupefying attempt by the filmmakers to force-feed james bond into the mindless xxx mold and throw 40 years of cinematic history down the toilet in favor of bright flashes and loud bangs . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -1.8677, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.3564, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.7775, -2.8177, -2.8577,\n -2.6941, -2.5318, -2.5726, -2.6131, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.4975, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.9216, -2.9584, -2.9950, -3.0315, -3.0677, -2.9212,\n -2.9576, -2.9938, -3.0298, -3.0657, -3.1013, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.3075,\n -3.3415, -3.2023, -3.0639, -3.0984, -3.1327, -2.9957, -3.0302, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.0317, -2.8977, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -3.1342, -3.1674, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.2205, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.0290, 3.2577, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.9962, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.6009, 5.7735,\n 5.9438, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.7006, 6.8483, 6.7469, 6.8931,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.6867, 7.8168, 7.9460, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.3284, 10.4341, 10.3544, 10.4596, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 10.9458, 11.0468,\n 11.1473, 11.0702, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.4581, 11.5549, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.8638, 11.9586, 11.8849, 11.9792, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe draw ( for `` big bad love '' ) is a solid performance by arliss howard . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 7.7426, 7.8928,\n 7.7710, 7.9196, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.4285, 8.5672, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.9815, 8.8780, 8.7757, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.2351, 9.3582, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 9.8389, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 9.9124, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.5621, 10.4769, 10.5848,\n 10.5002, 10.6076, 10.7143, 10.8204, 10.9259, 11.0309, 10.9473, 11.0517,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.4819, 11.5813,\n 11.5026, 11.4244, 11.3468, 11.4459, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.0935, 12.1867, 12.1141, 12.2068, 12.2992, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.8772, 4.0980, 3.9620, 3.8297, 4.0451, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.6571, 4.8394, 5.0190, 4.9075,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.9348, 5.1065, 5.0019, 4.8990,\n 5.0680, 4.9666, 4.8667, 5.0332, 5.1978, 5.0990, 5.0017, 5.1640,\n 5.0679, 4.9731, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.4160,\n 5.3243, 5.4772, 5.6286, 5.5377, 5.4480, 5.5976, 5.5088, 5.4212,\n 5.5690, 5.7155, 5.6285, 5.5426, 5.6874, 5.6023, 5.5181, 5.6614,\n 5.8034, 5.7199, 5.6373, 5.7778, 5.6959, 5.6149, 5.7540, 5.8919,\n 5.8114, 5.7318, 5.8684, 5.7894, 5.7112, 5.8464, 5.9806, 5.9029,\n 5.8260, 5.9589, 5.8825, 5.8069, 5.9386, 6.0693, 5.9941, 5.9196,\n 6.0491, 5.9752, 5.9019, 6.0302, 6.1577, 6.0848, 6.0125, 6.1389,\n 6.0671, 5.9960, 6.1213, 6.2458, 6.1750, 6.1047, 6.2282, 6.1584,\n 6.0892, 6.2116, 6.3333, 6.4543, 6.3853, 6.5054, 6.4368, 6.3688,\n 6.4880, 6.6064, 6.5387, 6.4715, 6.5891, 6.5223, 6.4559, 6.5727,\n 6.6887, 6.8041, 6.7380, 6.8527, 6.7869, 6.7217, 6.8355, 6.9488,\n 7.0614, 6.9964, 7.1083, 7.0436, 6.9793, 7.0905, 7.2012, 7.1372,\n 7.0736, 7.1835, 7.1203, 7.0574, 7.1667, 7.2753, 7.2127, 7.1506,\n 7.2585, 7.1967, 7.1352, 7.2425, 7.3493, 7.4556, 7.3943, 7.5000,\n 7.4390, 7.3783, 7.4834, 7.5880, 7.6922, 7.6317, 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\ngreen might want to hang onto that ski mask , as robbery may be the only way to pay for his next project . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.5323, 1.4317, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.8477, 1.7634, 1.6803, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.6997, 1.8838, 1.8074,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.7951, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.6498, 1.8116,\n 1.9720, 1.9066, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.9009, 2.0548, 1.9920, 1.9298, 1.8682, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.7772, 1.9261, 1.8665, 1.8074, 1.9545, 2.1005, 2.0412,\n 2.1858, 2.1268, 2.0682, 2.0101, 1.9524, 1.8953, 2.0373, 2.1783,\n 2.1210, 2.2608, 2.2037, 2.3422, 2.2852, 2.2287, 2.1726, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.2780, 2.4122, 2.3570, 2.4902, 2.4351,\n 2.3805, 2.3262, 2.2723, 2.2188, 2.3500, 2.4803, 2.4267, 2.3735,\n 2.3206, 2.4495, 2.3967, 2.3443, 2.2923, 2.2406, 2.3679, 2.3163,\n 2.2650, 2.3912, 2.5166, 2.4653, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.4597, 2.5820, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.4553, 2.4065, 2.5265, 2.4778, 2.4294, 2.3812, 2.3333,\n 2.4520, 2.4042, 2.3567, 2.4744, 2.5915, 2.5439, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.8411, 4.1312, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.4138, 8.5448, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.9586, 8.8602, 8.9861, 9.1111, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.1925, 9.3140, 9.4346, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.1243, 10.2348, 10.3445, 10.2592, 10.1745, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.3583, 10.4652, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.0004, 11.1018, 11.2028, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.0937, 11.1933, 11.2924, 11.2164, 11.1410,\n 11.2396, 11.3378, 11.2630, 11.3608, 11.4581, 11.5549, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.9273, 12.0209, 11.9487, 12.0419, 12.1347, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's one pussy-ass world when even killer-thrillers revolve around group therapy sessions . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.2629, -2.0732, -1.8856,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.0998, -2.1381,\n -2.1762, -2.0369, -2.0751, -1.9370, -1.9753, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -1.8556, -1.8935, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.7592, -1.6292, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 4.8038, 4.6790, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.0937, 3.9837, 3.8759, 3.7700, 3.6662, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthough it 's become almost redundant to say so , major kudos go to leigh for actually casting people who look working-class . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.3736, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.2397, -0.2867, -0.1429, -0.1898, -0.2365, -0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.5579, -0.5991, -0.4695, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.1962, 5.0623, 4.9316, 4.8038, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.2916, 8.1862, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.5543, 9.6732, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.2514, 10.3630, 10.4738, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.8838, 10.7987, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.5217, 11.6219, 11.5414, 11.6412, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 11.8956, 11.9927, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.2214, 12.3163, 12.2397, 12.3342, 12.2581,\n 12.3523, 12.4460, 12.5394, 12.6323, 12.5568, 12.6494, 12.5745, 12.6667,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe band 's courage in the face of official repression is inspiring , especially for aging hippies ( this one included ) . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "12.1%", + "z-score": "-4.22", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.0000,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.9336, -2.9775, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -3.0448, -2.8583, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.3041, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.4403, -3.4769, -3.5132, -3.5494, -3.5853, -3.6210, -3.6566,\n -3.6919, -3.7270, -3.7619, -3.7966, -3.6420, -3.6770, -3.7117, -3.7463,\n -3.7808, -3.8150, -3.8490, -3.6980, -3.5480, -3.5827, -3.6172, -3.6515,\n -3.6856, -3.7196, -3.7534, -3.7870, -3.8205, -3.8538, -3.8869, -3.7417,\n -3.7750, -3.8081, -3.8411, -3.8740, -3.9067, -3.9392, -3.9716, -4.0038,\n -4.0359, -4.0678, -4.0996, -3.9590, -3.9910, -4.0228, -4.0545, -4.0860,\n -4.1174, -4.1487, -4.0109, -3.8739, -3.9056, -3.9372, -3.9687, -4.0000,\n -4.0312, -4.0622, -4.0931, -4.1239, -4.1546, -4.1851, -4.2155])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.1654, 8.3267, 8.1684, 8.0139, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.6817, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.5819, 8.7250, 8.8667, 9.0068, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.9469, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.0380,\n 9.9392, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.2719, 11.3791, 11.2877,\n 11.1971, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.4762, 11.3899, 11.4935, 11.5966, 11.6990, 11.6137,\n 11.5290, 11.6311, 11.5471, 11.4638, 11.5655, 11.4829, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.8210, 11.9197, 11.8393, 11.9377,\n 11.8579, 11.7787, 11.8766, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.1260, 12.2214, 12.1447, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.2954, 12.3888, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe movie achieves as great an impact by keeping these thoughts hidden as ... ( quills ) did by showing them . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.2828, 0.2349, 0.3746,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.2568, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "155", + "Fraction of T in Greenlist": "77.9%", + "z-score": "17.2", + "p value": "7.86e-67", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.0698,\n 10.1999, 10.3287, 10.4565, 10.5830, 10.4579, 10.5837, 10.7084, 10.8321,\n 10.9546, 10.8327, 10.9546, 11.0755, 11.1954, 11.3143, 11.1954, 11.3137,\n 11.4311, 11.5476, 11.6632, 11.5470, 11.6620, 11.7762, 11.8896, 12.0021,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 12.8599,\n 12.9641, 13.0677, 13.1707, 13.2730, 13.1681, 13.2701, 13.3714, 13.4722,\n 13.5724, 13.4694, 13.5693, 13.6685, 13.7672, 13.8654, 13.7642, 13.8621,\n 13.9594, 14.0561, 14.1524, 14.0530, 14.1489, 14.2443, 14.3393, 14.4338,\n 14.3360, 14.4301, 14.5238, 14.6170, 14.7098, 14.6135, 14.7060, 14.7981,\n 14.8896, 14.9808, 14.8860, 14.9769, 15.0674, 15.1574, 15.2470, 15.1537,\n 15.2430, 15.3320, 15.4206, 15.5087, 15.4167, 15.5046, 15.5922, 15.6793,\n 15.7661, 15.6754, 15.7619, 15.8481, 15.9339, 16.0194, 15.9299, 16.0151,\n 16.1000, 16.1846, 16.2688, 16.1805, 16.2644, 16.3481, 16.4314, 16.5144,\n 16.4272, 16.5100, 16.5925, 16.6746, 16.7564, 16.6704, 16.7520, 16.8333,\n 16.9143, 16.9950, 16.9101, 16.9906, 17.0708, 17.1507, 17.2304])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthe film flat lines when it should peak and is more missed opportunity and trifle than dark , decadent truffle . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.2810, 1.1896, 1.0999, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.2719, 1.2111, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.3166, 1.2611, 1.2060, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.3288, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.1593, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.3284, 1.2804, 1.2326, 1.1852, 1.3131, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.2839, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.3019, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.0814, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.1241, 5.3134, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.4868, 8.3976, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.3813, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.6924, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.2867, 9.3993, 9.3181, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.1968, 10.1189, 10.0416,\n 9.9648, 10.0701, 9.9940, 10.0987, 10.2029, 10.1273, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.2872, 10.2132, 10.1398, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.3148, 10.4140, 10.5128, 10.6111, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.9564, 10.8872, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\njaglom ... put ( s ) the audience in the privileged position of eavesdropping on his characters \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.6379, -2.4495, -2.4951, -2.3094, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.8928, -2.9317, -2.9704,\n -2.8116, -2.6540, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -3.0706, -2.9216, -2.7735, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -2.8853, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.3044, -3.3381, -3.3716, -3.2348,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.1199, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.1991, 11.1026, 11.0070, 11.1172, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.1640, 11.0724, 10.9816, 11.0897,\n 10.9998, 11.1073, 11.0183, 10.9301, 10.8426, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 10.9048, 11.0102, 10.9259, 11.0309, 11.1352, 11.0517,\n 10.9689, 11.0728, 11.1761, 11.2789, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.6219, 11.5414, 11.6412, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.7787, 11.7000, 11.6220, 11.5444, 11.6425, 11.5655, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.7050, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.0000,\n 11.9273, 12.0209, 12.1141, 12.0419, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nfresnadillo 's dark and jolting images have a way of plying into your subconscious like the nightmare you had a week ago that wo n't go away . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.3474, -1.3859, -1.4241, -1.2950, -1.3333,\n -1.3714, -1.2435, -1.1163, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.5340, 7.6996, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 9.8015, 9.6719, 9.8058, 9.6786, 9.8116,\n 9.9433, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.5997, 10.4846, 10.3709,\n 10.4932, 10.6145, 10.5027, 10.6232, 10.7429, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.4531, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 11.9083, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.4567, 12.5604, 12.6635, 12.7660, 12.8679, 12.7704, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.4745, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 13.8615,\n 13.9561, 13.8642, 13.9585, 14.0524, 13.9615, 13.8713, 13.7818, 13.6931,\n 13.6050, 13.6990, 13.7926, 13.8857, 13.7986, 13.8914, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.3642, 14.2805, 14.1974, 14.1149, 14.2046, 14.2939, 14.3828, 14.3011,\n 14.3897, 14.3087, 14.2282, 14.3166, 14.4046, 14.4923, 14.5797, 14.5000,\n 14.4208, 14.5080, 14.4294, 14.5162, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwe know the plot 's a little crazy , but it held my interest from start to finish . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.2810, 1.5068, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.6803, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.7450, 1.6667, 1.8543, 1.7765, 1.6997, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.6164, 1.5483, 1.7178, 1.6499,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.6222, 1.5613, 1.7154, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.7772, 1.7179, 1.6591, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.5191, 1.6646, 1.6081, 1.5519, 1.4963, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.7018, 1.6473, 1.7864, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.8490, 1.7970, 1.9291, 1.8773, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.9837, 1.9327, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.8598, 1.8102, 1.9370, 2.0631, 2.0134, 1.9640, 1.9149,\n 2.0396, 1.9906, 2.1145, 2.0656, 2.1886, 2.1398, 2.0913, 2.0430,\n 2.1648, 2.1167, 2.0688, 2.0212, 1.9738, 2.0943, 2.0470, 2.0000,\n 2.1195, 2.2384, 2.1913, 2.1444, 2.0979, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.1779, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 5.0623, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.1302, 9.0453, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.9249, 9.8431, 9.9524, 9.8712, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.2753, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.5366, 10.6397, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 11.0177, 10.9422, 11.0418, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.2864, 11.2126, 11.1392, 11.2366,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.5489, 11.6441, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's a scattershot affair , but when it hits its mark it 's brilliant . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.0371, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570, 2.1939, 2.4910, 2.3333,\n 2.6186, 2.8947, 2.7406, 2.5924, 2.4495, 2.7136, 2.9704, 3.2205, 3.0792,\n 2.9424, 2.8098, 2.6811, 2.9212, 3.1558, 3.3853, 3.6098, 3.8297, 3.7009,\n 3.9158, 3.7897, 4.0000, 4.2064, 4.4091, 4.2848, 4.1633, 4.3618, 4.2426,\n 4.1260, 4.3205, 4.5118, 4.7002, 4.8857, 4.7703, 4.9528, 5.1326, 5.3100,\n 5.1962, 5.3709, 5.2590, 5.1490, 5.0410, 4.9348, 4.8305, 4.7278, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.0332, 5.1978, 5.3605, 5.2615, 5.1640, 5.3245,\n 5.4832, 5.6401, 5.7955, 5.6986, 5.6032, 5.5090, 5.6622, 5.8139, 5.7207,\n 5.6286, 5.5377, 5.4480, 5.3594, 5.5088, 5.4212, 5.3345, 5.4822, 5.3964,\n 5.3116, 5.2278, 5.3736, 5.5181, 5.6614, 5.5780, 5.4956, 5.4140, 5.3333,\n 5.4747, 5.6149, 5.7540, 5.8919, 6.0287, 5.9481, 6.0837, 6.0038, 6.1382,\n 6.0590, 6.1923, 6.1137, 6.0359, 5.9589, 5.8825, 5.8069, 5.9386, 6.0693,\n 6.1990, 6.3278, 6.2524, 6.3803, 6.5072, 6.6332, 6.5582, 6.6833, 6.6088,\n 6.5350, 6.4618, 6.3892, 6.3172, 6.2458, 6.3694, 6.4923, 6.4213, 6.5433,\n 6.4728, 6.4028, 6.3333, 6.4543, 6.5745, 6.5054, 6.4368, 6.5561, 6.6747,\n 6.7925, 6.9097, 6.8413, 6.7734, 6.7060, 6.8222, 6.9378, 6.8707, 6.8041,\n 6.7380, 6.6724, 6.6072, 6.7217, 6.6568, 6.7706, 6.8838, 6.8192, 6.7551,\n 6.6914, 6.8037, 6.9155, 7.0266, 6.9631, 6.9000, 6.8373, 6.7751, 6.8853,\n 6.9950, 7.1041, 7.2127, 7.3208, 7.2585, 7.3660, 7.3041, 7.4109, 7.3493,\n 7.4556, 7.3943, 7.3333, 7.2728, 7.2125, 7.1527, 7.2581, 7.3631, 7.4676,\n 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nhardly a masterpiece , but it introduces viewers to a good charitable enterprise and some interesting real people . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "11.6%", + "z-score": "-4.38", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -2.8701, -2.9109, -2.9515,\n -2.9917, -3.0317, -3.0714, -3.1109, -3.1500, -3.1889, -3.2276, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.4528, -3.4884, -3.3359, -3.3717,\n -3.4073, -3.4428, -3.4780, -3.5131, -3.5480, -3.5827, -3.6172, -3.6515,\n -3.6856, -3.5382, -3.5725, -3.6067, -3.6407, -3.6745, -3.7082, -3.7417,\n -3.7750, -3.8081, -3.8411, -3.8740, -3.7311, -3.7641, -3.7970, -3.8297,\n -3.8623, -3.8947, -3.9269, -3.9590, -3.9910, -4.0228, -3.8838, -3.9158,\n -3.9476, -3.9793, -4.0109, -4.0423, -4.0736, -4.1048, -4.1358, -4.1667,\n -4.1974, -4.2280, -4.2585, -4.2889, -4.3191, -4.3492, -4.3792])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.1137, 6.3254, 6.1237,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.1929, 8.0498, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.4017, 8.5491, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.2121, 8.0928, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.5149, 8.4138, 8.5448, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.7927, 8.9178, 9.0419, 8.9469,\n 8.8529, 8.7600, 8.8833, 8.7913, 8.9138, 9.0354, 8.9444, 8.8544,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.1615, 9.0773, 8.9940, 8.9113,\n 9.0267, 8.9448, 8.8636, 8.7831, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.6016, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.5066, 9.4299, 9.3537, 9.4619, 9.3863, 9.3113,\n 9.4188, 9.3443, 9.2704, 9.3774, 9.3040, 9.2311, 9.3374, 9.2651,\n 9.3708, 9.4761, 9.4042, 9.3328, 9.2619, 9.3665, 9.2961, 9.2261,\n 9.3302, 9.2607, 9.1916, 9.2952, 9.3982, 9.3295, 9.4320, 9.3638,\n 9.4658, 9.5673, 9.6684, 9.6005, 9.7011, 9.6336, 9.7337, 9.8333,\n 9.9325, 10.0312, 10.1295, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nyou wo n't like roger , but you will quickly recognize him . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.7085, -1.7500, -1.7913, -1.8324, -1.6859,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -1.8556, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -1.8898, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.7410, -1.6125, -1.6496, -1.6865, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 7.7784, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.8987, 10.0188, 10.1379, 10.0380,\n 10.1564, 10.0577, 10.1754, 10.0779, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.6059, 11.7108, 11.8151, 11.9187, 11.8287, 11.9319,\n 11.8427, 11.9455, 11.8571, 11.7696, 11.8719, 11.7851, 11.8870, 11.8010,\n 11.7157, 11.6311, 11.5471, 11.6487, 11.7498, 11.6666, 11.5841, 11.5022,\n 11.4209, 11.3402, 11.2602, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.2214, 12.1447, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.5394, 12.6323, 12.7248, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.2864, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nif steven soderbergh 's ` solaris ' is a failure it is a glorious failure . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.8577, 0.8006, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812, 0.9258, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.4885, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.2487, 0.2067, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.5021, 9.6309, 9.7586, 9.6470, 9.7738, 9.8995,\n 10.0242, 9.9146, 10.0385, 9.9304, 10.0535, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.4903, 10.6061, 10.7211, 10.6218, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.8801, 10.9917, 10.8960, 11.0070, 10.9123, 11.0227,\n 10.9291, 11.0389, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.7108, 11.6206, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 11.9594, 12.0611, 12.1622, 12.0749, 11.9883,\n 12.0891, 12.1893, 12.1036, 12.2034, 12.3027, 12.4015, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.3455, 12.4430, 12.5401, 12.6367, 12.5542, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.9755, 12.8957, 12.8165, 12.9099, 13.0030, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.1233, 13.2149, 13.1376, 13.2288, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.3473, 13.2717, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nbyler reveals his characters in a way that intrigues and even fascinates us , and he never reduces the situation to simple melodrama . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -0.9933, -1.0371, -1.0806, -1.1239,\n -0.9802, -1.0235, -1.0666, -0.9245, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.9769, 10.8801, 10.7843, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.0096, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.6287, 12.7279, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.6188, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.4536, 14.5426, 14.6313, 14.5479, 14.6362, 14.7242, 14.8119,\n 14.8991, 14.9860, 15.0726, 15.1587, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.5870, 15.5060, 15.5900, 15.6736, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nthis riveting world war ii moral suspense story deals with the shadow side of american culture : racial prejudice in its ugly and diverse forms . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "10.6%", + "z-score": "-4.71", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.3333,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.2177, -1.9711, -2.0294, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.2819, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.5645,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.7439, -3.7791, -3.6210, -3.6566,\n -3.6919, -3.7270, -3.7619, -3.7966, -3.8312, -3.8655, -3.8997, -3.9337,\n -3.9675, -4.0011, -4.0345, -4.0678, -4.1009, -4.1338, -3.9835, -4.0166,\n -4.0496, -4.0825, -4.1152, -4.1477, -4.1800, -4.2122, -4.2443, -4.2762,\n -4.3079, -4.3395, -4.1944, -4.2262, -4.2578, -4.2893, -4.3207, -4.3519,\n -4.3830, -4.2409, -4.2722, -4.3033, -4.3343, -4.3652, -4.3959, -4.4265,\n -4.2872, -4.3180, -4.3487, -4.3792, -4.4096, -4.4399, -4.4700, -4.5000,\n -4.5299, -4.5596, -4.5893, -4.6188, -4.6482, -4.6775, -4.7066])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.1156, 3.3665, 3.2205, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.1265, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.7026, 7.5954, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.3408, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.9373, 10.0504, 10.1627, 10.0748, 10.1865, 10.0995,\n 10.0133, 9.9278, 9.8430, 9.9542, 9.8702, 9.7869, 9.8975, 10.0074,\n 9.9249, 10.0342, 9.9524, 9.8712, 9.9800, 10.0881, 10.0076, 10.1151,\n 10.2220, 10.1423, 10.0631, 10.1695, 10.2753, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.9730, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.1073, 12.2016, 12.2954, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nit 's difficult to imagine the process that produced such a script , but here 's guessing that spray cheese and underarm noises played a crucial role . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.2501, 1.1767, 1.3641, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.2247,\n 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.4863, 1.4335, 1.5714, 1.5187, 1.6554, 1.7913, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.4570, 1.5848, 1.7119, 1.6632, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.5967, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.4093, 1.5298, 1.4846, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.9055, 3.1334, 3.0123, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.3147, 3.5228, 3.7273, 3.6141, 3.8146, 4.0119,\n 4.2060, 4.0937, 4.2844, 4.1740, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.5115, 4.6904, 4.8669, 5.0410, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 5.9874, 5.8835, 5.7812, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.0474, 6.1996, 6.3502, 6.2517, 6.1546,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.4059, 6.5504, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 6.8834, 7.0201,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.3765, 7.5076, 7.4194,\n 7.5494, 7.6785, 7.5912, 7.7192, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.1731, 8.2956, 8.4173, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.2867, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.6814, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.0353, 9.9562, 9.8776, 9.7997, 9.9067, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.2790, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.9247, 13.0157, 13.1063, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nno sophomore slump for director sam mendes , who segues from oscar winner to oscar-winning potential with a smooth sleight of hand . \nSentiment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -1.7865, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.6514, -1.6988, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -3.1342, -3.1674, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "65.6%", + "z-score": "10.5", + "p value": "5.18e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.6376, 7.5056,\n 7.3760, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.4686, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 7.7784, 7.9216, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\non the whole , the movie lacks wit , feeling and believability to compensate for its incessant coarseness and banality . \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.4517, -1.4976, -1.5430, -1.5882, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.6246, -1.6667, -1.7085, -1.7500, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -1.8556, -1.8935, -1.9311, -1.9686, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -2.0578, -1.9267, -1.9635, -2.0000,\n -2.0364, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 9.1455, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 9.8852, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.2833, 10.4042, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.2522, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.0357, 12.1419, 12.0433,\n 11.9457, 12.0516, 12.1568, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.9820, 13.0815, 12.9874, 13.0866,\n 13.1852, 13.2834, 13.1905, 13.0984, 13.1962, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.5929, 13.5039, 13.5987, 13.6931,\n 13.6050, 13.5176, 13.6117, 13.7054, 13.7986, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.9821,\n 15.0689, 15.1553, 15.2414, 15.3272, 15.4126, 15.4976, 15.5823, 15.6667,\n 15.7507, 15.6686, 15.7524, 15.8359, 15.9191, 15.8378, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Classify the sentiment of the following sentence as positive or negative:\nwhy make a documentary about these marginal historical figures ? \nSentiment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.5275,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.6632, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.5159, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165, 1.3472,\n 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868, 3.2206, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998, 3.1177, 3.3968, 3.2222,\n 3.0551, 3.3235, 3.5839, 3.4219, 3.6742, 3.5176, 3.7626, 3.6108, 3.8490,\n 4.0814, 3.9337, 3.7905, 4.0166, 4.2378, 4.0980, 3.9620, 4.1779, 4.0451,\n 3.9158, 3.7897, 3.6667, 3.8765, 4.0825, 4.2848, 4.4836, 4.6790, 4.5569,\n 4.7488, 4.6291, 4.8177, 4.7002, 4.5850, 4.4721, 4.3614, 4.5461, 4.7281,\n 4.9075, 4.7980, 4.9747, 4.8669, 4.7610, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.4222, 5.5811,\n 5.4832, 5.3867, 5.5435, 5.4482, 5.3541, 5.2614, 5.1698, 5.3243, 5.4772,\n 5.6286, 5.7785, 5.9270, 5.8358, 5.9827, 5.8926, 6.0380, 5.9488, 5.8606,\n 5.7735, 5.6874, 5.8310, 5.9732, 6.1143, 6.0288, 6.1685, 6.0838, 6.0000,\n 6.1383, 6.2755, 6.1924, 6.3283, 6.2459, 6.3807, 6.5144, 6.4327, 6.5653,\n 6.6968, 6.8274, 6.9570, 7.0857, 7.0043, 6.9237, 6.8439, 6.9714, 6.8922,\n 6.8138, 6.7361, 6.6591, 6.7854, 6.9107, 7.0353, 7.1590, 7.2818, 7.2051,\n 7.3271, 7.2510, 7.3721, 7.2966, 7.2217, 7.1474, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.3584, 7.4762, 7.4034, 7.3312, 7.4482, 7.5644, 7.4927, 7.6082,\n 7.5369, 7.6517, 7.7658, 7.6950, 7.6246, 7.7380, 7.8507, 7.9628, 8.0742,\n 8.0042, 7.9347, 7.8657, 7.9764, 7.9078, 7.8397, 7.7720, 7.7048, 7.8147,\n 7.9241, 8.0328, 8.1410, 8.2486, 8.1817, 8.2887, 8.2221, 8.3286, 8.2624,\n 8.1966, 8.1312, 8.0663, 8.1721, 8.2773, 8.3820, 8.3173, 8.4215, 8.3572,\n 8.2933, 8.3969, 8.5000, 8.4364, 8.5390, 8.4757, 8.5778, 8.6794, 8.6164,\n 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + } + ], + "metrics": { + "accuracy_without_watermark": 0.44, + "accuracy_with_watermark": 0.51, + "f1_without_watermark": 0.44, + "f1_with_watermark": 0.5882352941176471 + } + } + }, + "cola": { + "train": { + "results": [ + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nOur friends won't buy this analysis, let alone the next one we propose.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "57.1%", + "z-score": "10.4", + "p value": "9.86e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.0000,\n 4.8662, 5.0623, 4.9316, 4.8038, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.5118, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.0190, 4.9075,\n 4.7980, 4.6904, 4.8669, 5.0410, 4.9348, 5.1065, 5.0019, 4.8990,\n 5.0680, 4.9666, 4.8667, 5.0332, 5.1978, 5.0990, 5.0017, 4.9058,\n 5.0679, 4.9731, 5.1332, 5.0395, 5.1977, 5.3541, 5.2614, 5.4160,\n 5.3243, 5.4772, 5.6286, 5.7785, 5.6875, 5.5976, 5.7458, 5.6569,\n 5.8035, 5.9488, 6.0927, 6.2354, 6.1470, 6.0596, 6.2008, 6.1143,\n 6.0288, 5.9442, 6.0838, 6.0000, 6.1383, 6.2755, 6.4116, 6.3283,\n 6.4632, 6.5970, 6.5144, 6.4327, 6.5653, 6.6968, 6.8274, 6.9570,\n 7.0857, 7.2134, 7.3402, 7.4661, 7.5910, 7.5094, 7.4286, 7.3485,\n 7.4724, 7.3930, 7.5161, 7.4373, 7.3592, 7.2818, 7.4039, 7.5251,\n 7.4483, 7.5687, 7.6883, 7.8072, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.2772, 8.3910, 8.5041, 8.6166,\n 8.7284, 8.6535, 8.7647, 8.8752, 8.8008, 8.7270, 8.6537, 8.7636,\n 8.6908, 8.8000, 8.9086, 9.0167, 8.9444, 8.8726, 8.9800, 9.0869,\n 9.0155, 9.1218, 9.0510, 9.1567, 9.2619, 9.3665, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.5369, 9.6394, 9.7415, 9.8431, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.1750, 10.2743, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.5286, 10.4603, 10.3923, 10.4893, 10.4217])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nOne more pseudo generalization and I'm giving up.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.4148, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.8040, 0.7509, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.2513, 0.2089, 0.3333,\n 0.2909, 0.4145, 0.3721, 0.3299, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 5.8140, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.3411, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 10.9123, 10.8186,\n 10.7257, 10.6338, 10.7444, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.2891, 12.3883, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.4661, 13.5589, 13.6514, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.9343, 13.8522, 13.9427, 14.0329, 14.1227, 14.2121, 14.1309,\n 14.2200, 14.3087, 14.2282, 14.3166, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nOne more pseudo generalization or I'm giving up.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "15", + "# Tokens in Greenlist": "6", + "Fraction of T in Greenlist": "40.0%", + "z-score": "1.34", + "p value": "0.0899", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe more we study verbs, the crazier they get.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.1202, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 4.6101, 4.4272, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.9586, 9.0845, 8.9861, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.0698, 9.9783, 9.8877, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.0504, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 10.9697,\n 10.8838, 10.7987, 10.7143, 10.8204, 10.9259, 11.0309, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.7031, 11.8028, 11.9020, 11.8210, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.5495, 12.6439, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.7847, 12.7082, 12.6323, 12.7248, 12.8169, 12.9087, 12.8333,\n 12.9247, 13.0157, 13.1063, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nDay by day the facts are getting murkier.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "18.9%", + "z-score": "-1.79", + "p value": "0.963", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.7566, -1.7990, -1.6473, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -1.9291, -1.7857])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "12", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "41.7%", + "z-score": "1.33", + "p value": "0.0912", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 0.5774,\n 1.0954, 0.8704, 1.3333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI'll fix you a drink.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "72", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "59.7%", + "z-score": "6.8", + "p value": "5.08e-12", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495, 2.1170,\n 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415, 4.3409, 4.0825,\n 4.3710, 4.6476, 4.4096, 4.6775, 4.4543, 4.2426, 4.0415, 3.8497, 4.1111,\n 4.3644, 4.6101, 4.8488, 5.0811, 5.3072, 5.5277, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.0576, 5.2697, 5.1121, 5.3199, 5.5234, 5.7229, 5.9186, 6.1107,\n 6.2993, 6.1477, 6.0000, 5.8560, 5.7155, 5.9017, 5.7646, 5.6307, 5.4997,\n 5.3716, 5.5549, 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738,\n 6.6395, 6.5166, 6.3960, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.8041])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nFred watered the plants flat.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill coughed his way out of the restaurant.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "171", + "Fraction of T in Greenlist": "85.9%", + "z-score": "19.8", + "p value": "5.54e-88", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.4194, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 8.2952, 8.4678, 8.6373, 8.4293, 8.5979, 8.7636,\n 8.9265, 9.0869, 9.2447, 9.4002, 9.5534, 9.7043, 9.8532, 10.0000,\n 10.1449, 9.9613, 9.7823, 9.9279, 10.0718, 10.2138, 10.3540, 10.4926,\n 10.6296, 10.7650, 10.8989, 10.7331, 10.8666, 10.9985, 11.1291, 11.2583,\n 11.3862, 11.5128, 11.6382, 11.7624, 11.8853, 12.0071, 12.1278, 11.9753,\n 11.8254, 11.9466, 12.0667, 12.1857, 12.3037, 12.4207, 12.5367, 12.6517,\n 12.7659, 12.6240, 12.7379, 12.8508, 12.9628, 13.0740, 13.1844, 13.2939,\n 13.4026, 13.5105, 13.6176, 13.7240, 13.8296, 13.6963, 13.5648, 13.6707,\n 13.7759, 13.8804, 13.9842, 14.0873, 14.1898, 14.2915, 14.3927, 14.2667,\n 14.3676, 14.4679, 14.5676, 14.6667, 14.7651, 14.8630, 14.9603, 15.0570,\n 15.1532, 15.2488, 15.3439, 15.2240, 15.1054, 15.2007, 15.2955, 15.3898,\n 15.4835, 15.5767, 15.6694, 15.7617, 15.8534, 15.7389, 15.8306, 15.9217,\n 16.0124, 16.1026, 16.1923, 16.2816, 16.3705, 16.4589, 16.5469, 16.6345,\n 16.7216, 16.6118, 16.5030, 16.5903, 16.6772, 16.7638, 16.8499, 16.9356,\n 17.0209, 17.1059, 17.1905, 17.0848, 17.1693, 17.2534, 17.3371, 17.4205,\n 17.5035, 17.5862, 17.6685, 17.7504, 17.8320, 17.9133, 17.9942, 17.8923,\n 17.7911, 17.8722, 17.9530, 18.0334, 18.1135, 18.1933, 18.2728, 18.3519,\n 18.4308, 18.3322, 18.4110, 18.4895, 18.5676, 18.6455, 18.7231, 18.8004,\n 18.8774, 18.9541, 19.0306, 19.1067, 19.1826, 19.0870, 18.9921, 19.0681,\n 19.1439, 19.2194, 19.2946, 19.3695, 19.4442, 19.5186, 19.5928, 19.5000,\n 19.5741, 19.6479, 19.7215, 19.7949, 19.8680, 19.7767, 19.8497])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe're dancing the night away.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 1.9415, 1.8257,\n 2.0738, 1.9599, 2.2011, 2.0889, 1.9795, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.4585, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.6833, 2.5873, 2.4930, 2.6914, 2.5981,\n 2.7928, 2.7005, 2.6098, 2.8006, 2.9887, 3.1743, 3.0833, 2.9938,\n 2.9057, 2.8189, 3.0000, 2.9140, 3.0924, 3.0071, 2.9231, 3.0984,\n 3.0151, 3.1879, 3.1052, 3.0237, 3.1937, 3.1129, 3.2806, 3.2004,\n 3.1211, 3.2863, 3.4498, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.4586, 3.3826, 3.5396, 3.4641, 3.3895, 3.5443, 3.4702, 3.6233,\n 3.5496, 3.4768, 3.6279, 3.5556, 3.7051, 3.6332, 3.5620, 3.7097,\n 3.8562, 4.0015, 3.9302, 3.8596, 3.7897, 3.7205, 3.8636, 3.7947,\n 3.9365, 3.8680, 3.8002, 3.9404, 3.8730, 4.0119, 3.9448, 3.8784,\n 4.0158, 3.9497, 4.0859, 4.0202, 3.9549, 4.0898, 4.2237, 4.3566,\n 4.2914, 4.2267, 4.1625, 4.0988, 4.2301, 4.1667, 4.2970, 4.2339,\n 4.1713, 4.3004, 4.2381, 4.3661, 4.3042, 4.2426, 4.3695, 4.3083,\n 4.4342, 4.3733, 4.3128, 4.4376, 4.5617, 4.6850, 4.6245, 4.5644,\n 4.5047, 4.4454, 4.5674, 4.5083, 4.6295, 4.5707, 4.5123, 4.6325,\n 4.5744, 4.6938, 4.6359, 4.5783, 4.6968, 4.6395, 4.7572, 4.7001,\n 4.6434, 4.7602, 4.8763, 4.9918, 4.9351, 4.8787, 4.8227, 4.7670,\n 4.8815, 4.8260, 4.9397, 4.8845, 4.8295, 4.9425, 4.8877, 5.0000,\n 4.9455, 4.8913, 5.0027, 4.9487, 5.0595, 5.0057, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHerman hammered the metal flat.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "44.4%", + "z-score": "1.35", + "p value": "0.089", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330, 1.3472])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.6418, 8.7758, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.4896, 9.3865, 9.2847,\n 9.4094, 9.3088, 9.4327, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 11.9024, 12.0032, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.2627, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 12.8546, 12.9491, 12.8680, 12.9621, 13.0558,\n 12.9755, 12.8957, 12.9891, 12.9099, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.4744, 13.5647, 13.4871, 13.4100, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.5265, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe critics laughed the play off the stage.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "11.0%", + "z-score": "-2.77", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.7705])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "150", + "Fraction of T in Greenlist": "75.4%", + "z-score": "16.4", + "p value": "7.87e-61", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 9.1455, 9.0213, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.2706, 10.3923, 10.5131, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 11.1111, 11.2259, 11.1197, 11.2339, 11.3473,\n 11.4599, 11.3555, 11.4675, 11.5788, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.2360, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.3524, 12.4567, 12.5604, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.0771, 13.1765, 13.2753, 13.3737, 13.2791,\n 13.3770, 13.4745, 13.3810, 13.4780, 13.5746, 13.6707, 13.7663, 13.8615,\n 13.9561, 14.0503, 13.9585, 14.0524, 14.1458, 14.2388, 14.1481, 14.2408,\n 14.3330, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.6986, 14.7885,\n 14.8779, 14.7898, 14.8789, 14.9677, 15.0560, 14.9689, 15.0570, 15.1448,\n 15.0585, 15.1460, 15.2331, 15.3198, 15.4062, 15.4922, 15.5778, 15.6631,\n 15.5783, 15.6633, 15.7481, 15.8325, 15.7485, 15.8327, 15.9165, 15.8333,\n 15.9169, 16.0002, 16.0832, 16.1658, 16.2481, 16.3301, 16.4118])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe pond froze solid.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "28.7%", + "z-score": "1.12", + "p value": "0.132", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330, 2.1170,\n 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547, 0.9802, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 0.8083, 0.6794, 1.0000,\n 0.8729, 0.7505, 0.6325, 0.9333, 0.8165, 0.7035, 0.5941, 0.8783, 0.7698,\n 1.0441, 0.9366, 0.8321, 0.7303, 0.9918, 0.8909, 0.7924, 1.0445, 0.9467,\n 0.8513, 1.0948, 1.0000, 0.9073, 0.8165, 1.0510, 0.9608, 1.1896, 1.0999,\n 1.0120, 0.9258, 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.5275,\n 1.4434, 1.3606, 1.2792, 1.1991, 1.4003, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328, 0.9623,\n 1.1476, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309, 1.1628, 1.3389,\n 1.2710, 1.2039, 1.1375, 1.3101, 1.2439, 1.4142, 1.3483, 1.2831, 1.2185,\n 1.3856, 1.3213, 1.4863, 1.4222, 1.3587, 1.2959, 1.4580, 1.3954, 1.5556,\n 1.4931, 1.4313, 1.3700, 1.5275, 1.4664, 1.6222, 1.5613, 1.5010, 1.4412,\n 1.3819, 1.3231, 1.2649, 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316,\n 1.2808, 1.4289, 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.0435,\n 1.1882, 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 1.0879, 1.0371, 1.1746, 1.1239, 1.0735,\n 1.0235, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.0954, 1.2285, 1.1794,\n 1.3114, 1.2623, 1.2136, 1.1651, 1.1169])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill rolled out of the room.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "113", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "74.3%", + "z-score": "12.1", + "p value": "4.58e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.0178, 7.8360, 7.6594, 7.8320, 8.0018, 8.1689, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.3881, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 10.2172, 10.3510, 10.4834, 10.6145,\n 10.7442, 10.8727, 11.0000, 10.8612, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.3389, 11.2094, 11.3308, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 12.0386, 11.9138, 11.7907, 11.9062, 12.0208,\n 11.9001, 12.0142, 12.1274, 12.0089, 12.1216, 12.0049, 12.1171, 12.0021,\n 11.8885, 12.0005, 11.8885, 11.7778, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe gardener watered the flowers flat.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.9608, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.3101, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.3856, 1.3213, 1.2577, 1.4222, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.3725, 1.3166, 1.4629, 1.4071, 1.5519, 1.4963, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.6160, 1.7566, 1.7018, 1.6473, 1.7864, 1.7321,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.6127, 1.7454, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.8033, 1.9327, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.7119, 1.8383, 1.9640, 1.9149,\n 1.8660, 1.8175, 1.9419, 1.8935, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.6843, 1.8058, 1.9267, 2.0470, 2.0000,\n 1.9533, 1.9068, 2.0259, 1.9795, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe gardener watered the flowers.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "86", + "Fraction of T in Greenlist": "43.2%", + "z-score": "5.93", + "p value": "1.47e-09", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142, 1.7321, 1.5852, 1.8889,\n 1.7457, 2.0370, 1.8974, 2.1776, 2.0412, 1.9096, 2.1783, 2.4398, 2.3094,\n 2.1831, 2.0605, 2.3113, 2.1909, 2.4345, 2.3163, 2.5533, 2.4371, 2.3238,\n 2.5538, 2.7791, 2.6667, 2.5568, 2.4495, 2.6679, 2.5621, 2.7757, 2.6713,\n 2.8804, 2.7775, 2.6765, 2.8808, 3.0817, 2.9814, 3.1787, 3.0796, 3.2733,\n 3.1754, 3.3657, 3.2691, 3.4562, 3.3607, 3.2667, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.3556, 3.2686, 3.4427, 3.3566, 3.2717,\n 3.1879, 3.3587, 3.2757, 3.4442, 3.3619, 3.5282, 3.4466, 3.6107, 3.5298,\n 3.6919, 3.6116, 3.5322, 3.6920, 3.8503, 3.7712, 3.6931, 3.6159, 3.7717,\n 3.6950, 3.8490, 3.7730, 3.9253, 3.8497, 3.7750, 3.9254, 4.0745, 4.0000,\n 4.1475, 4.0736, 4.2196, 4.1461, 4.2907, 4.2178, 4.3609, 4.2885, 4.2167,\n 4.3583, 4.4987, 4.4272, 4.5663, 4.4953, 4.6332, 4.5626, 4.6992, 4.6291,\n 4.7645, 4.6949, 4.6258, 4.7599, 4.8930, 4.8242, 4.7559, 4.6883, 4.8200,\n 4.7527, 4.8833, 4.8164, 4.9460, 4.8795, 4.8135, 4.7481, 4.8763, 4.8113,\n 4.9385, 4.8737, 5.0000, 4.9356, 5.0609, 4.9969, 5.1213, 5.0576, 4.9943,\n 5.1177, 5.2402, 5.1772, 5.2989, 5.2362, 5.3571, 5.2947, 5.4147, 5.3526,\n 5.4718, 5.4100, 5.3487, 5.2877, 5.4059, 5.3452, 5.2850, 5.2251, 5.3423,\n 5.2827, 5.3991, 5.3398, 5.4554, 5.3964, 5.3377, 5.2795, 5.3941, 5.3361,\n 5.4501, 5.3923, 5.5056, 5.4480, 5.5606, 5.5033, 5.6153, 5.5582, 5.5015,\n 5.6126, 5.7233, 5.6667, 5.6104, 5.7203, 5.8296, 5.7735, 5.8822, 5.8263,\n 5.9345])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill broke the bathtub into pieces.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.2950, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill broke the bathtub.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "19.7%", + "z-score": "-1.5", + "p value": "0.933", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey drank the pub dry.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.5828, -1.6230, -1.6630, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.7592, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.3594, 6.1815, 6.0093, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 6.8995, 6.7568, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.4370, 7.3147, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 7.9530, 7.8444, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.4449, 8.3463, 8.2488, 8.1524,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 8.9138, 8.8228, 8.7327, 8.6436,\n 8.5553, 8.6770, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.3617, 9.2768, 9.1927, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.7908, 9.7109, 9.6317, 9.5532,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.7224, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.2790, 10.2029, 10.1273, 10.0523, 9.9778,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.3148, 10.4140, 10.5128, 10.6111, 10.5410, 10.6389, 10.7363, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.0521, 10.9829, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey drank the pub.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 1.3472,\n 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547, 0.9802, 1.3608,\n 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856, 1.7321, 1.5852, 1.4444,\n 1.7457, 2.0370, 1.8974, 2.1776, 2.4495, 2.3116, 2.1783, 2.0494, 1.9245,\n 1.8034, 2.0605, 2.3113, 2.1909, 2.4345, 2.6726, 2.5533, 2.4371, 2.3238,\n 2.2133, 2.1054, 2.3333, 2.5568, 2.4495, 2.6679, 2.8823, 2.7757, 2.6713,\n 2.5690, 2.4689, 2.3706, 2.5775, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823,\n 2.8868, 2.7928, 2.7005, 2.6098, 2.8006, 2.9887, 2.8983, 3.0833, 3.2660,\n 3.1760, 3.0873, 3.0000, 2.9140, 2.8292, 2.7456, 2.6632, 2.5820, 2.5019,\n 2.4228, 2.3448, 2.5198, 2.4423, 2.3658, 2.2902, 2.4618, 2.6316, 2.7995,\n 2.7235, 2.6485, 2.8138, 2.9775, 2.9025, 2.8284, 2.9897, 2.9161, 2.8433,\n 3.0022, 2.9299, 3.0870, 3.0151, 2.9439, 3.0989, 3.0282, 2.9582, 2.8889,\n 3.0415, 2.9726, 2.9044, 3.0551, 2.9872, 3.1363, 3.2841, 3.4308, 3.3627,\n 3.2953, 3.2285, 3.1623, 3.3066, 3.4499, 3.3838, 3.5256, 3.4599, 3.3947,\n 3.5350, 3.6742, 3.6091, 3.5446, 3.4806, 3.4171, 3.3542, 3.2918, 3.2299,\n 3.1685, 3.3049, 3.2437, 3.3789, 3.3181, 3.2577, 3.1977, 3.3314, 3.2717,\n 3.2124, 3.3447, 3.2857, 3.2271, 3.1690, 3.1113, 3.0540, 2.9971, 3.1273,\n 3.0706, 3.0143, 3.1433, 3.0872, 3.0315, 2.9761, 2.9212, 2.8666, 2.8124,\n 2.9394, 2.8853, 2.8316, 2.9575, 2.9040, 2.8508, 2.7979, 2.7454, 2.6932,\n 2.6414, 2.7654, 2.7137, 2.6623, 2.7852, 2.9076, 3.0292, 2.9776, 2.9263,\n 2.8752, 2.9957, 2.9448, 2.8943, 3.0138, 2.9633, 2.9132, 2.8633, 2.8137,\n 2.7644, 2.7154, 2.8333, 2.7844, 2.7358, 2.8528, 2.8043, 2.7560, 2.7080,\n 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.3030,\n 7.4839, 7.3051, 7.1317, 7.3113, 7.4878, 7.6613, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.0924, 9.2376,\n 9.0947, 8.9544, 9.0990, 9.2418, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.6719, 9.8058, 9.9384, 10.0698,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.3347, 10.4608, 10.5859,\n 10.7098, 10.8327, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.5470, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 11.7762, 11.8885, 12.0000, 11.8895, 11.7803, 11.6723, 11.7838,\n 11.6772, 11.7881, 11.8982, 12.0077, 12.1164, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.4405, 12.5460, 12.4434, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.9662, 13.0674, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.2722, 13.3710, 13.4691, 13.5668, 13.6640,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.9530, 13.8593, 13.9543, 14.0488,\n 14.1428, 14.2364, 14.1440, 14.2373, 14.3301, 14.4225, 14.5144, 14.4234,\n 14.5150, 14.6062, 14.6970, 14.7874, 14.6976, 14.7877, 14.8773, 14.9666,\n 15.0555, 14.9669, 15.0555, 15.1438, 15.0560, 14.9689, 14.8825, 14.9707,\n 14.8849, 14.9729, 14.8878, 14.9755, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.4085, 15.4940, 15.5792, 15.4956, 15.5805, 15.4976, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.5870, 15.6709, 15.7545, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe professor talked us into a stupor.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.5323, 1.4317, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.0973, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.0069, -0.8601, -0.7143, -0.7593, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.5843, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.8420, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 8.9544, 9.0990, 8.9618, 8.8271, 8.9709, 9.1130, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.4087, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.5997, 10.4846, 10.3709,\n 10.4932, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.8699,\n 10.9870, 10.8790, 10.9955, 11.1111, 11.2259, 11.3399, 11.4531, 11.3473,\n 11.4599, 11.3555, 11.2522, 11.3644, 11.4759, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.7222, 11.6242, 11.7320, 11.8392,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.9820, 12.8877, 12.9874, 12.8942,\n 12.9935, 12.9011, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.7772, 13.6876, 13.7818, 13.6931,\n 13.6050, 13.6990, 13.7926, 13.7054, 13.7986, 13.8914, 13.9838, 14.0758,\n 13.9896, 14.0813, 13.9959, 13.9111, 14.0025, 14.0936, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.4591, 14.5479, 14.6362, 14.7242, 14.8119,\n 14.8991, 14.8167, 14.7348, 14.6534, 14.7406, 14.6599, 14.7468, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe professor talked us.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 8.0413, 7.9196, 7.8000, 7.6823, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.6678, 8.5612, 8.4560, 8.5891, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.5556, 9.4563, 9.3582, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.3695, 10.2763, 10.1840, 10.0926, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.2514, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 10.9497, 10.8631, 10.7772,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.5655, 11.6666, 11.5841, 11.5022,\n 11.4209, 11.3402, 11.4411, 11.3610, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.2638, 12.1854,\n 12.1076, 12.0302, 11.9534, 12.0493, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.5394, 12.6323, 12.7248, 12.8169, 12.7416, 12.8333,\n 12.7585, 12.6841, 12.6102, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe yelled ourselves hoarse.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 7.8320, 7.6667,\n 7.8355, 8.0017, 7.8420, 8.0064, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 8.9815,\n 9.1225, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.1589, 9.2952,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.5876, 9.4705, 9.6011,\n 9.4858, 9.3721, 9.5021, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 9.7897, 9.9146, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.4304, 10.5490, 10.4444, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.5909, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.0554, 11.1640, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.2142, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.4935, 11.5966, 11.5111, 11.4263,\n 11.3423, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.4065, 12.5024, 12.4223, 12.5179, 12.4384, 12.5336,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.8313, 12.9244, 13.0171, 12.9391,\n 13.0314, 12.9540, 13.0460, 12.9691, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.2717, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe yelled ourselves.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 5.0000,\n 4.8662, 4.7357, 4.6082, 4.8038, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.7133, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.7473, 9.8632, 9.7725, 9.8877, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.0102, 11.1151, 11.2194, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.5655, 11.6666, 11.5841, 11.6847,\n 11.7849, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.0180, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.3263, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.5495, 12.6439, 12.7378, 12.6597, 12.7532, 12.8464, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.0608, 13.1520, 13.2429, 13.1667,\n 13.2572, 13.3473, 13.2717, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe yelled Harry hoarse.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "111", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "67.6%", + "z-score": "10.4", + "p value": "1.94e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.0000,\n 7.8355, 7.6751, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.1999, 10.3287, 10.2030, 10.0791, 9.9570, 9.8367, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.1124, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.2587, 10.3812, 10.2706, 10.1614, 10.0535, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.3257, 10.4444, 10.5623, 10.4592, 10.3571])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHarry coughed himself into a fit.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "1", + "Fraction of T in Greenlist": "11.1%", + "z-score": "-0.962", + "p value": "0.832", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHarry coughed himself.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.5507, 0.4988, 0.4472, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.2722, 0.4070, 0.3607, 0.3146, 0.4481, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.4377, 0.3928, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "166", + "Fraction of T in Greenlist": "83.4%", + "z-score": "19", + "p value": "4.71e-81", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.8926, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.4850, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 10.0426, 9.8754,\n 10.0178, 10.1585, 10.2976, 10.4350, 10.5709, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 10.9222, 11.0513, 11.1791, 11.3056, 11.4310,\n 11.5551, 11.4097, 11.5333, 11.6559, 11.7773, 11.8977, 12.0170, 11.8771,\n 11.9961, 12.1140, 12.2309, 12.3468, 12.4619, 12.3269, 12.4416, 12.5553,\n 12.6682, 12.7802, 12.8913, 12.7609, 12.8717, 12.9817, 13.0909, 13.1993,\n 13.3070, 13.1806, 13.2879, 13.3945, 13.5004, 13.6056, 13.7100, 13.5873,\n 13.6915, 13.7950, 13.8978, 14.0000, 14.1015, 13.9822, 14.0835, 14.1842,\n 14.2842, 14.3836, 14.4825, 14.3663, 14.4649, 14.5629, 14.6604, 14.7573,\n 14.8536, 14.7404, 14.8365, 14.9321, 15.0272, 15.1217, 15.2158, 15.1052,\n 15.1990, 15.2924, 15.3852, 15.4776, 15.5695, 15.4614, 15.5531, 15.6443,\n 15.7351, 15.8254, 15.9153, 15.8096, 15.8993, 15.9886, 16.0774, 16.1658,\n 16.2538, 16.1503, 16.2381, 16.3255, 16.4125, 16.4992, 16.5854, 16.4839,\n 16.5700, 16.6557, 16.7410, 16.8259, 16.9105, 16.8109, 16.8953, 16.9794,\n 17.0631, 17.1464, 17.2294, 17.1317, 17.2146, 17.2971, 17.3792, 17.4611,\n 17.5426, 17.4466, 17.5280, 17.6090, 17.6897, 17.7701, 17.8502, 17.7559,\n 17.8359, 17.9155, 17.9949, 18.0739, 18.1527, 18.0599, 18.1386, 18.2169,\n 18.2949, 18.3727, 18.4502, 18.3589, 18.4363, 18.5133, 18.5901, 18.6667,\n 18.7429, 18.6531, 18.7292, 18.8051, 18.8807, 18.9561, 19.0312])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHarry coughed us into a fit.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "33.3%", + "z-score": "0.577", + "p value": "0.282", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.3054, 7.1187, 7.3030,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.1435, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.4936, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.0947, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.2536,\n 9.3927, 9.5304, 9.4000, 9.5366, 9.4087, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.7306, 9.6156, 9.5021, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.2259, 11.1197, 11.0147, 11.1291,\n 11.0254, 11.1392, 11.0368, 11.1500, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.1860, 11.2966, 11.4065, 11.3091, 11.2127, 11.3222, 11.2268,\n 11.1324, 11.0389, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.9060, 11.8151, 11.9187, 12.0218, 11.9319,\n 11.8427, 11.7543, 11.6667, 11.5797, 11.4935, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 11.9341, 12.0341, 12.1335, 12.0499,\n 11.9669, 11.8846, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.0712, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.3163, 12.2397, 12.1635, 12.0878,\n 12.0127, 11.9380, 12.0327, 11.9586, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.1141, 12.0419, 11.9701, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill followed the road into the forest.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.7321, -1.7772, -1.6138, -1.6591, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -1.8676, -1.9098, -1.9518, -1.9935, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.1264, -2.1637, -2.0282, -2.0656, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.2384, -2.2740, -2.3094, -2.3447, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "180", + "Fraction of T in Greenlist": "90.5%", + "z-score": "21.3", + "p value": "3.46e-101", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 8.2923, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.7438, 8.9113, 9.0759, 9.2376, 9.3967, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.7996, 9.9491, 10.0965, 10.2419, 10.3853, 10.5269, 10.6667,\n 10.8047, 10.9411, 10.7524, 10.8887, 11.0234, 11.1566, 11.2882, 11.4184,\n 11.5473, 11.6747, 11.8008, 11.6276, 11.7536, 11.8784, 12.0020, 12.1244,\n 12.2456, 12.3656, 12.4846, 12.6025, 12.7194, 12.8352, 12.9501, 13.0639,\n 12.9066, 13.0204, 13.1333, 13.2453, 13.3564, 13.4666, 13.5760, 13.6845,\n 13.7923, 13.6441, 13.7518, 13.8587, 13.9648, 14.0701, 14.1747, 14.2786,\n 14.3818, 14.4842, 14.5860, 14.6871, 14.7875, 14.8873, 14.7495, 14.8492,\n 14.9484, 15.0469, 15.1448, 15.2420, 15.3387, 15.4349, 15.5304, 15.3990,\n 15.4945, 15.5895, 15.6839, 15.7778, 15.8711, 15.9640, 16.0563, 16.1481,\n 16.2395, 16.3303, 16.4206, 16.5105, 16.3864, 16.4763, 16.5657, 16.6547,\n 16.7432, 16.8312, 16.9188, 17.0060, 17.0928, 16.9734, 17.0601, 17.1464,\n 17.2323, 17.3178, 17.4029, 17.4877, 17.5720, 17.6559, 17.7395, 17.8227,\n 17.9055, 17.9879, 17.8741, 17.9566, 18.0386, 18.1204, 18.2017, 18.2828,\n 18.3634, 18.4438, 18.5238, 18.4137, 18.4937, 18.5733, 18.6527, 18.7317,\n 18.8104, 18.8888, 18.9669, 19.0447, 19.1222, 19.1994, 19.2763, 19.3529,\n 19.2472, 19.3238, 19.4001, 19.4761, 19.5518, 19.6272, 19.7024, 19.7773,\n 19.8520, 19.7492, 19.8238, 19.8982, 19.9723, 20.0461, 20.1197, 20.1930,\n 20.2661, 20.3389, 20.4115, 20.4838, 20.5559, 20.6277, 20.5286, 20.6004,\n 20.6720, 20.7434, 20.8145, 20.8854, 20.9560, 21.0265, 21.0967, 21.0000,\n 21.0702, 21.1402, 21.2099, 21.2795, 21.3488, 21.2538, 21.3231])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe drove Highway 5 from SD to SF.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nFred tracked the leak to its source.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.8793, 0.8165, 0.9864, 0.9238, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 0.9488, 1.1111, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.8601, 0.8095, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.6742, 0.6274, 0.5808, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.7789, 0.9062, 1.0328, 0.9870, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.2244, 1.3474, 1.4699, 1.5916, 1.7128, 1.8333,\n 1.7870, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "159", + "Fraction of T in Greenlist": "79.9%", + "z-score": "17.9", + "p value": "7.69e-72", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.2222, 7.4194, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 8.2952, 8.4678, 8.6373, 8.4293, 8.5979, 8.7636,\n 8.9265, 9.0869, 9.2447, 9.4002, 9.5534, 9.7043, 9.8532, 10.0000,\n 9.8150, 9.9613, 9.7823, 9.9279, 10.0718, 10.2138, 10.3540, 10.4926,\n 10.6296, 10.7650, 10.8989, 10.7331, 10.8666, 10.9985, 11.1291, 11.2583,\n 11.3862, 11.5128, 11.6382, 11.7624, 11.8853, 12.0071, 11.8538, 11.9753,\n 11.8254, 11.9466, 12.0667, 12.1857, 12.3037, 12.4207, 12.5367, 12.6517,\n 12.7659, 12.6240, 12.7379, 12.8508, 12.9628, 13.0740, 13.1844, 13.2939,\n 13.4026, 13.5105, 13.6176, 13.7240, 13.5901, 13.6963, 13.5648, 13.6707,\n 13.7759, 13.8804, 13.9842, 14.0873, 14.1898, 14.2915, 14.3927, 14.2667,\n 14.3676, 14.4679, 14.5676, 14.6667, 14.7651, 14.8630, 14.9603, 15.0570,\n 14.9359, 14.8162, 14.9132, 15.0096, 15.1054, 14.9881, 14.8721, 14.9681,\n 15.0636, 14.9495, 14.8365, 14.9321, 15.0272, 14.9160, 15.0108, 15.1052,\n 14.9957, 15.0898, 15.1834, 15.2766, 15.3692, 15.2619, 15.1556, 15.2483,\n 15.1432, 15.0391, 14.9359, 14.8337, 14.9269, 15.0195, 15.1118, 15.2036,\n 15.1031, 15.0035, 15.0952, 14.9967, 14.8990, 14.8021, 14.8940, 14.9854,\n 15.0763, 15.1669, 15.2570, 15.3467, 15.4360, 15.3411, 15.4302, 15.3362,\n 15.4250, 15.5134, 15.6014, 15.6891, 15.7763, 15.8631, 15.9496, 16.0357,\n 15.9437, 16.0296, 16.1151, 16.2003, 16.2851, 16.3695, 16.4536, 16.5374,\n 16.6208, 16.7039, 16.7866, 16.6969, 16.7794, 16.6905, 16.7728, 16.8549,\n 16.9366, 17.0180, 17.0991, 17.1799, 17.2604, 17.3406, 17.2533, 17.3333,\n 17.4130, 17.4925, 17.5716, 17.6504, 17.7290, 17.8072, 17.8852])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn danced waltzes across the room.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.0359, -1.0812, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.0612, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.9119, -0.9509, -0.9897, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill urinated out the window.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "124", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "49.2%", + "z-score": "6.22", + "p value": "2.46e-10", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 4.1003, 4.2601, 4.1761, 4.3339, 4.4901, 4.6448, 4.5611, 4.4783,\n 4.6311, 4.7823, 4.7001, 4.8497, 4.9980, 5.1450, 5.2906, 5.2085,\n 5.3526, 5.2713, 5.4140, 5.3333, 5.4747, 5.6149, 5.5348, 5.4554,\n 5.5942, 5.5155, 5.6530, 5.7894, 5.9247, 6.0590, 6.1923, 6.1137,\n 6.2459, 6.1680, 6.0908, 6.2217])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill coughed out the window.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.1551, 8.9815, 8.8121, 8.9672, 9.1201, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.6600, 10.7918, 10.9222, 10.7732, 10.6270, 10.7575, 10.8866,\n 11.0145, 10.8727, 11.0000, 11.1261, 11.2510, 11.3747, 11.4974, 11.3608,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.7104, 11.8289, 11.6988, 11.8168,\n 11.6890, 11.8065, 11.9230, 12.0386, 11.9138, 12.0289, 12.1432, 12.2565,\n 12.3690, 12.4807, 12.3595, 12.2398, 12.3514, 12.4622, 12.5723, 12.6815,\n 12.7900, 12.6735, 12.5583, 12.4444, 12.3319, 12.4409, 12.5491, 12.4384,\n 12.5462, 12.6533, 12.7597, 12.8653, 12.9704, 12.8622, 12.7551, 12.8599,\n 12.9641, 13.0677, 13.1707, 13.2730, 13.1681, 13.2701, 13.1665, 13.2681,\n 13.3690, 13.4694, 13.3675, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.7621, 13.6630, 13.7606, 13.8578, 13.9544, 14.0505, 14.1462, 14.0489,\n 13.9524, 13.8567, 13.9524, 14.0475, 14.1422, 14.0479, 14.1422, 14.2361,\n 14.3295, 14.4225, 14.5150, 14.4222, 14.3301, 14.4225, 14.5144, 14.6059,\n 14.6970, 14.7877, 14.6970, 14.7874, 14.6976, 14.7877, 14.8773, 14.7885,\n 14.7002, 14.6126, 14.7023, 14.6155, 14.7049, 14.6188, 14.7079, 14.6225,\n 14.5378, 14.6267, 14.5426, 14.6313, 14.5479, 14.6362, 14.7242, 14.6416,\n 14.7293, 14.6473, 14.7348, 14.8219, 14.9086, 14.9950, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.2609, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill bled on the floor.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 6.7625, 6.5924, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.2648, 7.1261, 6.9903, 7.1554, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 7.7710, 7.6512, 7.5333, 7.6823, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 8.8529, 8.9763, 8.8833, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.0951, 9.0060, 9.1252, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.4185, 9.3320, 9.4474, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.6186, 9.5346, 9.4513, 9.5638, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.5413, 9.6519, 9.5714, 9.6814, 9.7908, 9.8995, 9.8197, 9.9278,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.7224, 9.6456, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.2029, 10.3065, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.8186, 10.9178, 10.8444, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe toilet leaked through the floor into the kitchen below.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165, 1.3472,\n 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547, 0.9802, 0.8165,\n 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428, 0.8083, 0.6794, 1.0000,\n 1.3093, 1.1793, 1.0541, 0.9333, 0.8165, 0.7035, 0.9901, 0.8783, 1.1547,\n 1.0441, 1.3112, 1.5717, 1.4606, 1.3525, 1.2472, 1.4968, 1.3926, 1.2910,\n 1.1918, 1.0948, 1.0000, 0.9073, 1.1431, 1.3744, 1.2810, 1.1896, 1.0999,\n 1.3234, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 1.2366,\n 1.1547, 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328, 0.9623,\n 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 1.0523, 0.9847, 0.9180, 0.8520,\n 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071, 0.8793, 1.0498, 0.9864,\n 1.1547, 1.3213, 1.2577, 1.1946, 1.1323, 1.2959, 1.2337, 1.1721, 1.1111,\n 1.0507, 0.9909, 0.9316, 1.0911, 1.2492, 1.1896, 1.1306, 1.0721, 1.2276,\n 1.1693, 1.1114, 1.0541, 0.9972, 0.9409, 0.8850, 1.0370, 1.1877, 1.1316,\n 1.0759, 1.0206, 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447,\n 0.9901, 1.1345, 1.0812, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 1.0879, 1.0371, 0.9867, 0.9366, 1.0735,\n 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303, 0.6825, 0.8165,\n 0.9497, 0.9017, 0.8540, 0.8066, 0.9382, 0.8909, 0.8438, 0.7971, 0.7506,\n 0.7044, 0.6584, 0.7878, 0.9165, 0.8704, 0.8245, 0.7789, 0.9062, 0.8607,\n 0.8154, 0.7703, 0.7255, 0.6810, 0.6367, 0.7620, 0.8866, 0.8422, 0.7979,\n 0.7539, 0.8773, 0.8333, 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.7385,\n 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 5.7155, 5.9604, 5.6804, 5.9214, 6.1546, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 8.0178, 8.1882, 8.3557, 8.5206, 8.6828, 8.8426, 9.0000,\n 8.8252, 8.9815, 8.8121, 8.9672, 9.1201, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.9795, 10.1187, 9.9653, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.3621, 10.4952, 10.6270, 10.4834, 10.6145,\n 10.7442, 10.8727, 10.7333, 10.8612, 10.7246, 10.8518, 10.9777, 10.8444,\n 10.9697, 11.0938, 10.9634, 11.0870, 11.2094, 11.3308, 11.4512, 11.3244,\n 11.4442, 11.5630, 11.4388, 11.5570, 11.6743, 11.7907, 11.9062, 11.7851,\n 11.6656, 11.7809, 11.8953, 11.7779, 11.6620, 11.7762, 11.8896, 12.0021,\n 12.1139, 12.2248, 12.1118, 12.2222, 12.1107, 12.2207, 12.1107, 12.0020,\n 12.1117, 12.0044, 12.1136, 12.2221, 12.3299, 12.4370, 12.5434, 12.4383,\n 12.5442, 12.6495, 12.5460, 12.6508, 12.7550, 12.8586, 12.9616, 12.8598,\n 12.7590, 12.8618, 12.9639, 13.0655, 13.1665, 13.2669, 13.3667, 13.4660,\n 13.5647, 13.4664, 13.3689, 13.2722, 13.3710, 13.4691, 13.5668, 13.6640,\n 13.7606, 13.8567, 13.9524, 14.0475, 14.1422, 14.0479, 14.1422, 14.2361,\n 14.3295, 14.4225, 14.5150, 14.6071, 14.6987, 14.6062, 14.6976, 14.6059,\n 14.6970, 14.7877, 14.8779, 14.9677, 15.0571, 15.1461, 15.0560, 14.9666,\n 15.0555, 15.1440, 15.0555, 15.1438, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.5793, 15.6653, 15.7509, 15.8362, 15.7495, 15.8345, 15.9193, 15.8334,\n 15.9179, 16.0020, 16.0858, 16.1693, 16.0845, 16.0002, 16.0836, 16.0000,\n 16.0832, 16.0002, 15.9178, 16.0009, 16.0836, 16.0019, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill ate off the floor.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.4432, 9.5969, 9.7483, 9.8976, 10.0448, 10.1900, 10.3333,\n 10.4748, 10.6145, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 11.1098,\n 11.2414, 11.3715, 11.5002, 11.3294, 11.4579, 11.5851, 11.7110, 11.8357,\n 11.9591, 12.0814, 12.2025, 12.3225, 12.4414, 12.5592, 12.4019, 12.2474,\n 12.0957, 12.2150, 12.0667, 11.9208, 12.0405, 11.8977, 12.0170, 11.8771,\n 11.9961, 12.1140, 12.2309, 12.3468, 12.4619, 12.5760, 12.6892, 12.8015,\n 12.6682, 12.5368, 12.4072, 12.2794, 12.1533, 12.2671, 12.1432, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.7017, 12.8110, 12.9196, 13.0274, 12.9080,\n 12.7900, 12.6735, 12.5583, 12.4444, 12.5531, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.1785, 13.0707,\n 12.9641, 12.8586, 12.7542, 12.8582, 12.7550, 12.8586, 12.9616, 13.0639,\n 13.1657, 13.2669, 13.3675, 13.4675, 13.5670, 13.4664, 13.5655, 13.4660,\n 13.3674, 13.4664, 13.3689, 13.4674, 13.5654, 13.6629, 13.7599, 13.8564,\n 13.9524, 14.0479, 14.1429, 14.2374, 14.1422, 14.0479, 13.9543, 14.0488,\n 13.9561, 13.8642, 13.9585, 13.8675, 13.9615, 14.0550, 13.9650, 14.0582,\n 13.9690, 13.8804, 13.7926, 13.8857, 13.7986, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.8034, 14.8912, 14.8074, 14.7242, 14.6416,\n 14.7293, 14.6473, 14.5659, 14.6534, 14.5726, 14.6599, 14.7468, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.1761, 15.2609, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill drank from the hose.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.0849, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.3521, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.7629, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.8494, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.1243, 10.0389, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.4164, 10.5238, 10.4407, 10.3583, 10.4652, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.7480, 10.6683, 10.7719,\n 10.8749, 10.9773, 10.8984, 11.0004, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.4244, 11.3468, 11.4459, 11.3688, 11.2924, 11.3910, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.6297, 11.5549, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.8638, 11.9586, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.9273, 11.8551, 11.7833, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis metal hammers flat easily.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 1.0370, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 0.8847, 1.0284, 0.9759, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "69", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "78.3%", + "z-score": "10.2", + "p value": "8.3e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.0820, 10.2172])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey made him president.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey made him angry.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.1187, 6.9378,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 8.7967, 8.6522, 8.8015, 8.9489,\n 9.0947, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.4087, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.0851, 10.9727, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.5556, 11.6683, 11.5601, 11.4531, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 11.7992, 11.6966, 11.5950,\n 11.7045, 11.6041, 11.5048, 11.6139, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 12.0902, 12.1936, 12.1012, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.3277, 12.2381, 12.3391, 12.2503, 12.1622, 12.2628, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.7918, 12.7073, 12.8037, 12.7199, 12.8160, 12.7329, 12.6504,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.9007, 13.9897, 13.9113, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.2737, 14.3607, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey caused him to become angry by making him.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "124", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "66.9%", + "z-score": "10.8", + "p value": "2.04e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.6000, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.4169,\n 10.3109, 10.2061, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.7678, 10.6719, 10.7843])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey caused him to become president by making him.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "25.8%", + "z-score": "0.176", + "p value": "0.43", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.9661, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.1640, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.5033, 4.3027, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.2828, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.6775, 9.7986, 9.9187, 10.0380,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.3695, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.7141, 10.6265, 10.7349, 10.8426, 10.9497, 10.8631, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.0102, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 12.0990, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.8019, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.0171, 12.9391,\n 12.8616, 12.7847, 12.8771, 12.9691, 12.8928, 12.8169, 12.7416, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.8667, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey made him to exhaustion.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "95", + "Fraction of T in Greenlist": "47.7%", + "z-score": "7.41", + "p value": "6.42e-14", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.3140, 9.2202, 9.1273, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.1302, 9.0453, 8.9612, 8.8778, 8.7952, 8.9113,\n 8.8294, 8.7482, 8.8636, 8.7831, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.5824, 8.6963, 8.6190, 8.5424, 8.6556, 8.5796, 8.5041, 8.4293,\n 8.3550, 8.2813, 8.2082, 8.1356, 8.0636, 7.9921, 7.9211, 7.8507,\n 7.9628, 7.8928, 7.8233, 7.7544, 7.6859, 7.7971, 7.9078, 7.8397,\n 7.9497, 7.8820, 7.8147, 7.7480, 7.8572, 7.9659, 7.8995, 7.8335,\n 7.9415, 8.0490, 7.9833, 7.9181, 7.8533, 7.7889, 7.7249, 7.8316,\n 7.7679, 7.7047, 7.6418, 7.7478, 7.6853, 7.6231, 7.7285, 7.6667,\n 7.6052, 7.5441, 7.4834, 7.4231, 7.5276, 7.4676, 7.4078])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "56.2%", + "z-score": "9.13", + "p value": "3.47e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660, 3.6566,\n 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962, 5.4611, 5.1711,\n 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569, 5.8890, 6.1143, 6.3333,\n 6.5465, 6.3255, 6.1137, 6.3254, 6.1237, 5.9297, 5.7429, 5.9530, 5.7735,\n 5.9797, 5.8068, 6.0093, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.2993, 6.1477, 6.0000, 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568,\n 6.9286, 7.0980, 6.9589, 6.8229, 6.9903, 6.8573, 6.7269, 6.5991, 6.7648,\n 6.6395, 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.2304, 7.1143, 7.0000, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296, 7.3773,\n 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.7373, 7.6317, 7.5275, 7.4247,\n 7.5653, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782, 7.9138, 8.0483, 8.1816,\n 8.0829, 7.9853, 7.8889, 7.7937, 7.6995, 7.6064, 7.7387, 7.8699, 8.0000,\n 8.1291, 8.2572, 8.3843, 8.5105, 8.4184, 8.3274, 8.4526, 8.3625, 8.2733,\n 8.1850, 8.3093, 8.2219, 8.3453, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439,\n 8.8631, 8.7773, 8.6924, 8.6083, 8.5249, 8.4423, 8.3605, 8.4788, 8.5964,\n 8.7133, 8.8294, 8.9448, 9.0595, 9.1735, 9.0923, 9.0117, 9.1250, 9.0452,\n 8.9660, 8.8874, 9.0000, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785, 9.2885,\n 9.3979, 9.5066, 9.4299, 9.3537, 9.2782, 9.2032, 9.1287])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey made him into a monster.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "87", + "Fraction of T in Greenlist": "43.7%", + "z-score": "6.1", + "p value": "5.36e-10", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.3140, 9.2202, 9.1273, 9.2480, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.1302, 9.0453, 8.9612, 8.8778, 8.7952, 8.7133,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.7033, 8.6241, 8.5456, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.3526, 8.2772, 8.2024, 8.1282, 8.2420,\n 8.1683, 8.0952, 8.0227, 7.9507, 7.8793, 7.8084, 7.9211, 7.8507,\n 7.7808, 7.7114, 7.8233, 7.9347, 7.8657, 7.7971, 7.7291, 7.6615,\n 7.5944, 7.5277, 7.4615, 7.3958, 7.3305, 7.2656, 7.2012, 7.1372,\n 7.0736, 7.0104, 6.9477, 6.8853, 6.8233, 6.7618, 6.7006, 6.8101,\n 6.7492, 6.6887, 6.6285, 6.7372, 6.6774, 6.6179, 6.5588, 6.5000,\n 6.4416, 6.3835, 6.3258, 6.2684, 6.2113, 6.1546, 6.0982])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.8320, 7.6613, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.6410,\n 8.7943, 8.9456, 8.7943, 8.9443, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.0947, 8.9544, 9.0990, 8.9618, 9.1051, 9.2469, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.6719, 9.8058, 9.9384, 10.0698,\n 10.1999, 10.3287, 10.4565, 10.3310, 10.4579, 10.3347, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.0780,\n 11.1966, 11.3143, 11.1990, 11.3161, 11.2025, 11.3189, 11.4345, 11.5492,\n 11.4378, 11.3276, 11.4420, 11.3333, 11.4471, 11.5601, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.8982, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.4405, 12.5460, 12.4434, 12.5485, 12.4471, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.9662, 13.0674, 13.1680, 13.0699,\n 13.1701, 13.2698, 13.1730, 13.2722, 13.1765, 13.0815, 13.1806, 13.2791,\n 13.1852, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 14.1543, 14.0660, 14.1582, 14.0707, 13.9838, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.7113, 14.7998, 14.7152, 14.8034, 14.7195, 14.6362, 14.7242, 14.8119,\n 14.7293, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.7468, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe trolley rumbled through the tunnel.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.1678, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.3570,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.5322, -2.3564, -2.4004,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.4887, -2.3238, -2.3660, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.4574, -2.3016, -2.3422, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.4351,\n -2.2871, -2.3262, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.4198, -2.2785, -2.3163,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.4099, -2.2744, -2.3110, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.4042, -2.2740, -2.3094, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe wagon rumbled down the road.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "35.6%", + "z-score": "2.09", + "p value": "0.0181", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.5323, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.6977, 1.6013, 1.5068, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.8791, 1.7889, 1.7002, 1.9064, 2.1094, 2.0207,\n 1.9335, 1.8477, 2.0455, 1.9604, 1.8766, 2.0702, 2.2611, 2.1773,\n 2.0948])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.0632, 8.2035, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.7766, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.3985, 12.5001, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 12.9165, 12.8267, 12.7376,\n 12.8359, 12.9337, 12.8456, 12.9430, 12.8556, 12.9527, 12.8661, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.5589, 13.6514, 13.7434, 13.8350, 13.9262,\n 14.0170, 13.9343, 13.8522, 13.9427, 13.8613, 13.7803, 13.8707, 13.7904,\n 13.7106, 13.8007, 13.7215, 13.6429, 13.7327, 13.8222, 13.7442, 13.8333,\n 13.7559, 13.6789, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe bullets whistled past the house.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 1.0915, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.9152, 0.8577, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 1.0370, 0.9812, 1.1316, 1.2808, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.1711, 1.3128, 1.2597, 1.4001, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.7158, 1.6641, 1.6127, 1.7454, 1.6941, 1.8257,\n 1.9566, 1.9052, 2.0350, 1.9837, 1.9327, 1.8821, 2.0105, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.6737, 1.6262, 1.5791, 1.7025,\n 1.6555, 1.7780, 1.8999, 1.8527, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.9533, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.1118, 5.9954, 6.1612, 6.0469, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.7327, 8.6436,\n 8.7652, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.6581, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.2867, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.5714, 9.6814, 9.7908, 9.7109, 9.8197, 9.9278,\n 10.0353, 9.9562, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.7423, 10.6650, 10.7671, 10.6904,\n 10.6144, 10.5388, 10.6404, 10.7415, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.6455, 10.5725, 10.6722, 10.5998, 10.5278, 10.6271, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.6111, 10.7090, 10.8064, 10.9034, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe knee replacement candidate groaned up the stairs.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "30.6%", + "z-score": "1.64", + "p value": "0.0502", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.9366, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 1.1991, 1.4003, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 1.2501, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 1.0289, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 1.2185, 1.3856, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.7143, 1.6514, 1.5892, 1.7457,\n 1.9009, 2.0548, 1.9920, 2.1442, 2.0817, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.7772, 1.7179, 1.8665, 1.8074, 1.7488, 1.8956, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.7522, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.5187, 1.6554, 1.6028, 1.5505, 1.6859,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.6432])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe car honked down the road.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.2041, -1.2599, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.7154, -1.7614, -1.5945, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.6246, -1.4762, -1.5187, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.6632, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.7410, -1.6125, -1.6496, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.3054, 7.1187, 7.3030,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.1435, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.4936, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.0947, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.6630, 9.5304, 9.4000, 9.5366, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.6867, 9.8187, 9.6960, 9.8271, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.2283, 10.1124, 9.9980, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.0385, 10.1614, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 10.7835, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.0615, 11.1734,\n 11.0746, 10.9769, 10.8801, 10.9917, 11.1026, 11.2127, 11.1172, 11.2268,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.6059, 11.5156, 11.4261, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.7326, 11.8336, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.5685, 12.6643, 12.5831, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.1746, 13.0956, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.1376, 13.2288, 13.1520, 13.2429, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe dog barked out of the room.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.6353, 1.5323, 1.7685, 1.6667,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.6678, 1.5785, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.5986, 1.7942, 1.7130, 1.9052,\n 2.0948, 2.2819, 2.2000, 2.1193, 2.0397, 2.2226, 2.1436, 2.0656,\n 1.9887, 2.1678, 2.0913, 2.0158, 1.9413, 1.8677, 2.0426, 2.2156,\n 2.1420, 2.3126, 2.2393, 2.4077, 2.3349, 2.2629, 2.1917, 2.1213,\n 2.0517, 2.2162, 2.1470, 2.3094, 2.4703, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 2.1567, 2.3120, 2.4660, 2.4004,\n 2.5527, 2.7037, 2.8534, 2.7875, 2.7222, 2.6575, 2.8051, 2.7406,\n 2.8868, 2.8226, 2.9673, 2.9035, 2.8402, 2.7775, 2.9202, 3.0619,\n 2.9991, 3.1395, 3.0770, 3.2161, 3.1539, 3.2918, 3.4286, 3.3665,\n 3.3049, 3.2437, 3.1831, 3.1229, 3.0632, 3.0039, 3.1382, 3.2717,\n 3.4042, 3.3447, 3.4762, 3.6068, 3.7366, 3.6770, 3.6178, 3.5590,\n 3.6874, 3.6289, 3.7563, 3.6980, 3.8244, 3.7664, 3.8919, 3.8341,\n 3.9586, 3.9010, 4.0247, 4.1477, 4.0901, 4.2122, 4.1549, 4.2762,\n 4.2191, 4.3395, 4.2827, 4.4023, 4.3456, 4.2893, 4.4080, 4.5260,\n 4.4698, 4.5871, 4.5311, 4.4754, 4.4202, 4.3652, 4.3106, 4.2563,\n 4.2023, 4.3180, 4.4331, 4.5476, 4.4936, 4.6074, 4.7206, 4.8333,\n 4.7792, 4.7255, 4.6720, 4.7838, 4.7305, 4.8416, 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "153", + "Fraction of T in Greenlist": "76.9%", + "z-score": "16.9", + "p value": "2.14e-64", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.2222, 6.9830, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.4293, 8.2281, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.5206, 8.6828, 8.8426, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 10.0426, 10.1840,\n 10.3237, 10.4618, 10.5982, 10.7331, 10.8666, 10.7052, 10.5472, 10.6810,\n 10.5269, 10.3758, 10.5096, 10.3621, 10.4952, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.8771,\n 11.9961, 12.1140, 12.2309, 12.3468, 12.2114, 12.0779, 12.1940, 12.0630,\n 12.1786, 12.0499, 11.9230, 12.0386, 12.1533, 12.2671, 12.3801, 12.4922,\n 12.6035, 12.7140, 12.8237, 12.9326, 13.0408, 13.1482, 13.2549, 13.3609,\n 13.4661, 13.5707, 13.6746, 13.5556, 13.4379, 13.5419, 13.4259, 13.3113,\n 13.4152, 13.3022, 13.4057, 13.5086, 13.6109, 13.4999, 13.6019, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.2730, 13.3747, 13.2701, 13.3714, 13.4722,\n 13.5724, 13.6720, 13.7710, 13.8695, 13.9675, 14.0649, 14.1618, 14.2581,\n 14.3540, 14.4493, 14.5442, 14.6385, 14.7324, 14.8257, 14.7255, 14.6262,\n 14.7195, 14.6212, 14.7143, 14.6170, 14.5206, 14.6135, 14.7060, 14.7981,\n 14.8896, 14.9808, 15.0715, 15.1618, 15.2517, 15.3411, 15.4302, 15.5188,\n 15.6070, 15.6949, 15.7823, 15.8694, 15.9561, 15.8631, 15.7709, 15.8575,\n 15.7661, 15.8525, 15.7619, 15.6720, 15.7584, 15.8443, 15.9299, 16.0151,\n 16.1000, 16.1846, 16.2688, 16.3526, 16.4361, 16.5193, 16.6021, 16.6846,\n 16.7668, 16.8487, 16.9302, 17.0115, 16.9244, 16.8379, 16.9191, 16.8333,\n 16.9143, 16.8292, 16.7447, 16.8256, 16.9063, 16.9866, 16.9030])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe dog barked its way out of the room.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.1547, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 1.2702, 1.1323, 1.4444,\n 1.3093, 1.1793, 1.0541, 1.3480, 1.6330, 1.5076, 1.7823, 1.6590, 1.5396,\n 1.4237, 1.3112, 1.2019, 1.0954, 0.9918, 1.2472, 1.4968, 1.3926, 1.6353,\n 1.5323, 1.4317, 1.3333, 1.2372, 1.1431, 1.0510, 0.9608, 1.1896, 1.4142,\n 1.3234, 1.5430, 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 1.2366,\n 1.4434, 1.3606, 1.5635, 1.4812, 1.6803, 1.8766, 1.7942, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.8074, 1.7321,\n 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771, 1.4076, 1.5823,\n 1.7552, 1.6854, 1.8559, 1.7865, 1.7178, 1.6499, 1.5828, 1.5164, 1.4506,\n 1.6166, 1.7809, 1.7150, 1.6498, 1.5852, 1.7467, 1.9066, 1.8419, 2.0000,\n 1.9355, 1.8716, 1.8084, 1.7457, 1.6837, 1.6222, 1.5613, 1.7154, 1.8682,\n 1.8071, 1.9582, 1.8974, 1.8370, 1.7772, 1.7179, 1.6591, 1.6008, 1.7488,\n 1.8956, 1.8371, 1.9825, 1.9242, 2.0682, 2.2111, 2.1527, 2.2943, 2.2361,\n 2.1783, 2.1210, 2.0642, 2.0078, 1.9518, 1.8962, 2.0349, 2.1726, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732, 2.0071,\n 2.1401, 2.0868, 2.2188, 2.1656, 2.1128, 2.0604, 2.0083, 1.9566, 1.9052,\n 2.0350, 2.1640, 2.1125, 2.0613, 2.0105, 2.1381, 2.2650, 2.2140, 2.3400,\n 2.2892, 2.2387, 2.1884, 2.1385, 2.0889, 2.0396, 1.9906, 2.1145, 2.2377,\n 2.1886, 2.3110, 2.2620, 2.2133, 2.1648, 2.1167, 2.0688, 2.0212, 1.9738,\n 2.0943, 2.2141, 2.1667, 2.2857, 2.2384, 2.1913, 2.1444, 2.0979, 2.0515,\n 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "157", + "Fraction of T in Greenlist": "78.9%", + "z-score": "17.6", + "p value": "2.59e-69", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.2222, 7.4194, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 8.2952, 8.0829, 8.2577, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.8926, 9.0520, 8.8648, 9.0233, 8.8426, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.5852, 9.7312, 9.5668,\n 9.7119, 9.8553, 9.9969, 10.1368, 10.2752, 10.4119, 10.2562, 10.3923,\n 10.2404, 10.3758, 10.5096, 10.6421, 10.7732, 10.9030, 11.0315, 10.8866,\n 11.0145, 10.8727, 11.0000, 11.1261, 11.2510, 11.3747, 11.4974, 11.6189,\n 11.4829, 11.6039, 11.4704, 11.5909, 11.7104, 11.8289, 11.9464, 12.0630,\n 12.1786, 12.0499, 12.1651, 12.0386, 12.1533, 12.2671, 12.3801, 12.4922,\n 12.6035, 12.7140, 12.5916, 12.7017, 12.5812, 12.6909, 12.7998, 12.9080,\n 13.0154, 13.1221, 13.2280, 13.1111, 13.2167, 13.1015, 13.2067, 13.3113,\n 13.4152, 13.5185, 13.6211, 13.7230, 13.6109, 13.7125, 13.6019, 13.7032,\n 13.8039, 13.9040, 14.0036, 14.1025, 14.2009, 14.0930, 14.1911, 14.0846,\n 14.1824, 14.2796, 14.3763, 14.4725, 14.5682, 14.6634, 14.5593, 14.6542,\n 14.5513, 14.6459, 14.7400, 14.8337, 14.9269, 15.0195, 15.1118, 15.0111,\n 15.1031, 15.0035, 15.0952, 15.1865, 15.2774, 15.3678, 15.4578, 15.5473,\n 15.4498, 15.5391, 15.4425, 15.5316, 15.6203, 15.7086, 15.7965, 15.8840,\n 15.9711, 15.8763, 15.9632, 15.8694, 15.9561, 16.0424, 16.1283, 16.2139,\n 16.2990, 16.3839, 16.2917, 16.3764, 16.2851, 16.3695, 16.4536, 16.5374,\n 16.6208, 16.7039, 16.7866, 16.6969, 16.7794, 16.6905, 16.7728, 16.8549,\n 16.9366, 17.0180, 17.0991, 17.1799, 17.0924, 17.1730, 17.0862, 17.1667,\n 17.2468, 17.3267, 17.4062, 17.4855, 17.5644, 17.4790, 17.5578])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill whistled his way past the house.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe witch vanished into the forest.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.6036, 1.8489, 1.7408, 1.6353, 1.5323, 1.7685, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.8240, 2.0428, 2.2576, 2.1602,\n 2.0647, 1.9711, 2.1798, 2.3851, 2.2916, 2.1997, 2.1094, 2.0207,\n 2.2200, 2.1320, 2.3276, 2.5205, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.3651, 2.2819, 2.4667, 2.6491, 2.5660, 2.4841, 2.4034, 2.3238,\n 2.5019, 2.4228, 2.3448, 2.2678, 2.1918, 2.1167, 2.0426, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.8773, 2.0381,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.9355, 1.8716, 2.0276, 2.1822,\n 2.1182, 2.0548, 1.9920, 1.9298, 2.0817, 2.0197, 2.1700, 2.3190,\n 2.2569, 2.1954, 2.1344, 2.0739, 2.2205, 2.1602, 2.3054, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.3529, 2.2943, 2.4348, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.3422, 2.4797, 2.4225, 2.5589, 2.6943,\n 2.6370, 2.5802, 2.5238, 2.4678, 2.6014, 2.7341, 2.6781, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.3500, 2.2966, 2.2436, 2.1909,\n 2.1386, 2.2680, 2.3967, 2.3443, 2.2923, 2.2406, 2.1892, 2.3163,\n 2.2650, 2.3912, 2.5166, 2.4653, 2.4142, 2.3635, 2.3131, 2.4371,\n 2.3868, 2.5099, 2.6323, 2.5820, 2.5319, 2.4822, 2.4327, 2.5538,\n 2.5044, 2.6247, 2.7443, 2.6949, 2.6458, 2.5969, 2.5483, 2.6667,\n 2.6182, 2.7358, 2.8528, 2.8043, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill disappeared down the road.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.4346, 9.3408, 9.2480, 9.1561, 9.0652,\n 8.9752, 9.0951, 9.0060, 8.9178, 9.0370, 8.9496, 9.0680, 8.9815,\n 8.8958, 8.8108, 8.7267, 8.8443, 8.7610, 8.6783, 8.5964, 8.5153,\n 8.4348, 8.3550, 8.4718, 8.3927, 8.3143, 8.4303, 8.3525, 8.2754,\n 8.1988, 8.1229, 8.0476, 8.1628, 8.0880, 8.0139, 7.9403, 7.8673,\n 7.7949, 7.7230, 7.6517, 7.5809, 7.5106, 7.6246, 7.7380, 7.6681,\n 7.5988, 7.7114, 7.6424, 7.5740, 7.5061, 7.4386, 7.3717, 7.4833,\n 7.5944, 7.5277, 7.4615, 7.5719, 7.5061, 7.4407, 7.3758, 7.3113,\n 7.4208, 7.5297, 7.4655, 7.4017, 7.3383, 7.2753, 7.3835, 7.3208,\n 7.2585, 7.1967, 7.1352, 7.0741, 7.0133, 6.9530, 6.8930, 6.8333,\n 6.7740, 6.7151, 6.6565, 6.5983, 6.5404, 6.6469, 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe witch went into the forest by vanishing.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547, 1.5403, 1.3608,\n 1.1921, 1.5492, 1.8898, 1.7233, 2.0466, 1.8856, 2.1939, 2.0381, 2.3333,\n 2.1822, 2.4659, 2.3190, 2.1776, 2.0412, 2.3116, 2.1783, 2.0494, 1.9245,\n 2.1831, 2.4351, 2.3113, 2.1909, 2.4345, 2.6726, 2.5533, 2.4371, 2.3238,\n 2.2133, 2.1054, 2.3333, 2.5568, 2.4495, 2.3445, 2.5621, 2.4585, 2.3570,\n 2.2576, 2.1602, 2.0647, 2.2743, 2.4804, 2.3851, 2.2916, 2.4930, 2.6914,\n 2.5981, 2.5064, 2.4163, 2.3276, 2.2404, 2.4327, 2.6222, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.7333, 2.6491, 2.5660, 2.4841, 2.6632, 2.8402, 2.7585,\n 2.6778, 2.8518, 3.0237, 2.9433, 2.8638, 2.7854, 2.7080, 2.6316, 2.7995,\n 2.9656, 2.8893, 2.8138, 2.9775, 2.9025, 3.0641, 2.9897, 2.9161, 2.8433,\n 3.0022, 3.1597, 3.0870, 3.0151, 2.9439, 3.0989, 3.0282, 2.9582, 2.8889,\n 2.8203, 2.9726, 3.1236, 3.0551, 2.9872, 2.9200, 2.8534, 2.7875, 2.7222,\n 2.6575, 2.8051, 2.7406, 2.6768, 2.6135, 2.7591, 2.6961, 2.8402, 2.7775,\n 2.7153, 2.8577, 2.7958, 2.9369, 2.8753, 2.8141, 2.9537, 2.8928, 3.0311,\n 2.9704, 3.1076, 3.0471, 2.9872, 2.9277, 3.0632, 3.1977, 3.1382, 3.0792,\n 3.2124, 3.3447, 3.4762, 3.4170, 3.3582, 3.2998, 3.4298, 3.5590, 3.5007,\n 3.4428, 3.5708, 3.5131, 3.6401, 3.5827, 3.5256, 3.4689, 3.5946, 3.7196,\n 3.6629, 3.6067, 3.7306, 3.8538, 3.7975, 3.7417, 3.6862, 3.6310, 3.5762,\n 3.6979, 3.8189, 3.7641, 3.7097, 3.8297, 3.7755, 3.8947, 3.8406, 3.7869,\n 3.7335, 3.8516, 3.9691, 3.9158, 3.8627, 3.9793, 4.0953, 4.0423, 3.9896,\n 3.9372, 3.8851, 3.8333, 3.9481, 4.0622, 4.0105, 3.9590, 4.0723, 4.0210,\n 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe witch went into the forest and thereby vanished.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.9258,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 1.0079, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785,\n 1.1138, 1.0498, 1.2185, 1.3856, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.2808, 1.4289,\n 1.3725, 1.5191, 1.4629, 1.4071, 1.3517, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.5818, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.5423, 1.4923, 1.4427, 1.3933, 1.3443, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.5848, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.6555, 1.7780, 1.7310, 1.6843, 1.6378, 1.7592, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.7688, 1.7233, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe building is tall and wide.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "148", + "# Tokens in Greenlist": "69", + "Fraction of T in Greenlist": "46.6%", + "z-score": "6.07", + "p value": "6.21e-10", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 2.3570, 2.1939, 2.4910, 2.7778,\n 3.0551, 2.8947, 3.1623, 3.0072, 2.8577, 3.1156, 2.9704, 3.2205, 3.0792,\n 3.3221, 3.1844, 3.0509, 3.2863, 3.1558, 3.3853, 3.2577, 3.4816, 3.7009,\n 3.9158, 3.7897, 3.6667, 3.5466, 3.4293, 3.6380, 3.5228, 3.7273, 3.6141,\n 3.8146, 4.0119, 4.2060, 4.0937, 4.2844, 4.1740, 4.0657, 4.2528, 4.1461,\n 4.3301, 4.2251, 4.4061, 4.5847, 4.7610, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.7977, 4.6981, 4.8667, 4.7683, 4.9346, 5.0990, 5.2615, 5.1640, 5.0679,\n 4.9731, 4.8797, 5.0395, 4.9472, 5.1051, 5.0138, 5.1698, 5.3243, 5.4772,\n 5.3865, 5.5377, 5.4480, 5.3594, 5.5088, 5.4212, 5.5690, 5.4822, 5.6285,\n 5.7735, 5.9172, 5.8310, 5.7457, 5.6614, 5.5780, 5.7199, 5.6373, 5.7778,\n 5.6959, 5.8351, 5.9732, 6.1101, 6.0287, 6.1644, 6.0837, 6.0038, 6.1382,\n 6.0590, 6.1923, 6.1137, 6.2459, 6.3770, 6.5072, 6.4291, 6.3517, 6.2750,\n 6.1990, 6.3278, 6.2524, 6.3803, 6.3054, 6.2312, 6.1577, 6.0848, 6.0125,\n 5.9409, 6.0671, 5.9960, 6.1213, 6.0506, 5.9805, 5.9109, 6.0351, 6.1584,\n 6.0892, 6.2116, 6.1429, 6.0746])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe building is tall and tall.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "148", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "39.2%", + "z-score": "3.99", + "p value": "3.35e-05", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 1.4142, 1.2702, 1.1323, 1.4444,\n 1.7457, 1.6082, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 2.0494, 1.9245,\n 1.8034, 1.6859, 1.5717, 1.8257, 1.7132, 1.9599, 1.8489, 1.7408, 1.9795,\n 2.2133, 2.1054, 2.0000, 1.8970, 1.7963, 2.0211, 1.9215, 2.1412, 2.0428,\n 1.9462, 2.1602, 2.3706, 2.2743, 2.4804, 2.3851, 2.2916, 2.4930, 2.4004,\n 2.5981, 2.5064, 2.4163, 2.6098, 2.8006, 2.7107, 2.8983, 2.8093, 2.7217,\n 2.9057, 2.8189, 3.0000, 2.9140, 2.8292, 3.0071, 3.1829, 3.0984, 3.0151,\n 2.9329, 2.8518, 3.0237, 2.9433, 3.1129, 3.0330, 2.9542, 3.1211, 3.2863,\n 3.2077, 3.3708, 3.2928, 3.2157, 3.3764, 3.2998, 3.4586, 3.3826, 3.3075,\n 3.4641, 3.6193, 3.5443, 3.4702, 3.3968, 3.3243, 3.4768, 3.4047, 3.5556,\n 3.4839, 3.4130, 3.5620, 3.7097, 3.6389, 3.7852, 3.7148, 3.6452, 3.7897,\n 3.7205, 3.8636, 3.7947, 3.7265, 3.8680, 4.0085, 3.9404, 3.8730, 3.8061,\n 3.7399, 3.8784, 3.8125, 3.9497, 3.8841, 3.8191, 3.7547, 3.6908, 3.6274,\n 3.5645, 3.6995, 3.6369, 3.7707, 3.7084, 3.6466, 3.5853, 3.7176, 3.8490,\n 3.7878, 3.9181, 3.8571, 3.9865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis building is taller and wider than that one.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -1.8892, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.0751, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.1167, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis building got taller and wider than that one.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.5556, -1.6037, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.4286, -2.4678, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.7137, -2.5750, -2.6112,\n -2.6472, -2.5099, -2.3734, -2.2377, -2.2744, -2.3110, -2.3473, -2.2133,\n -2.2497, -2.1167, -1.9843, -1.8527, -1.7218, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis building got taller and taller.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis building is taller and taller.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "54.6%", + "z-score": "8.43", + "p value": "1.74e-17", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.3140, 9.4346, 9.3408, 9.2480, 9.1561, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.7439, 8.8631, 8.7773,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.5607, 8.4788, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.5088, 8.4303, 8.5456, 8.6603,\n 8.5824, 8.5052, 8.4286, 8.3526, 8.2772, 8.3910, 8.3162, 8.4293])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis building got than that one.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.3499, 0.5222, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.4444, 0.6083, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.8325, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.6912, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.9382, 1.0690,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.9165, 1.0445,\n 1.1717, 1.1251, 1.0788, 1.0328, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.7539, 0.8773, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "175", + "Fraction of T in Greenlist": "87.9%", + "z-score": "20.5", + "p value": "9.8e-94", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.0200, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.2746, 7.4730, 7.2222, 7.4194, 7.6120, 7.8003, 7.9845, 8.1650,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.7996, 9.9491, 10.0965, 10.2419, 10.3853, 10.5269, 10.6667,\n 10.8047, 10.6145, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 10.8012,\n 10.9355, 10.7650, 10.8989, 11.0313, 11.1622, 11.2918, 11.4201, 11.5470,\n 11.6727, 11.7971, 11.9203, 12.0424, 12.1633, 12.2832, 12.4019, 12.5196,\n 12.6363, 12.7520, 12.8667, 12.9804, 13.0932, 13.2052, 13.3162, 13.4263,\n 13.2791, 13.1341, 13.2448, 13.3547, 13.4638, 13.5721, 13.4320, 13.5401,\n 13.4026, 13.5105, 13.6176, 13.7240, 13.8296, 13.9345, 14.0387, 14.1421,\n 14.2449, 14.3470, 14.4484, 14.5492, 14.6494, 14.7489, 14.8478, 14.9461,\n 15.0437, 15.1408, 15.2374, 15.3333, 15.4287, 15.5236, 15.6179, 15.4935,\n 15.3705, 15.4651, 15.5592, 15.6528, 15.7459, 15.6259, 15.7189, 15.6006,\n 15.6934, 15.7858, 15.8777, 15.9691, 16.0600, 16.1504, 16.2404, 16.3299,\n 16.4190, 16.5077, 16.5959, 16.6836, 16.7710, 16.8579, 16.9444, 17.0305,\n 17.1162, 17.2016, 17.2865, 17.3710, 17.4552, 17.5390, 17.4292, 17.3205,\n 17.4045, 17.4882, 17.5714, 17.6543, 17.5477, 17.6305, 17.5251, 17.6078,\n 17.6902, 17.7722, 17.8539, 17.9353, 18.0163, 18.0970, 18.1774, 18.2574,\n 18.3371, 18.4165, 18.4956, 18.5744, 18.6529, 18.7310, 18.8089, 18.8865,\n 18.9637, 19.0407, 19.1174, 19.1938, 19.2700, 19.1707, 19.0722, 19.1485,\n 19.2246, 19.3003, 19.3758, 19.2789, 19.3543, 19.2582, 19.3336, 19.4086,\n 19.4835, 19.5580, 19.6323, 19.7064, 19.7801, 19.8537, 19.9270, 20.0000,\n 20.0728, 20.1453, 20.2177, 20.2897, 20.3616, 20.4332, 20.5046])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis building is than that one.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.0000, 1.3093, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.5635, 1.4812, 1.6803, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.7450, 1.9333, 1.8543, 1.7765, 1.9612, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 2.0158, 1.9413, 1.8677, 1.7951, 1.9695,\n 2.1420, 2.0692, 1.9973, 1.9262, 1.8559, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.9829, 2.1470, 2.0785, 2.0107, 1.9437, 1.8773, 2.0381,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.7143, 1.6514, 1.5892, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.6547, 1.5945, 1.7465, 1.8974,\n 1.8370, 1.7772, 1.9261, 2.0739, 2.0140, 2.1602, 2.3054, 2.2454,\n 2.1858, 2.1268, 2.2699, 2.2111, 2.1527, 2.0948, 2.0373, 1.9803,\n 2.1210, 2.0642, 2.2037, 2.3422, 2.2852, 2.4225, 2.3657, 2.5019,\n 2.4453, 2.3891, 2.5238, 2.4678, 2.6014, 2.5456, 2.4902, 2.4351,\n 2.3805, 2.3262, 2.2723, 2.2188, 2.1656, 2.1128, 2.2436, 2.1909,\n 2.3206, 2.2680, 2.2159, 2.3443, 2.4721, 2.4198, 2.3679, 2.3163,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.2377, 2.1886, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.7410, 1.8605, 1.8145, 1.7688, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "149", + "Fraction of T in Greenlist": "74.9%", + "z-score": "16.2", + "p value": "1.15e-59", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.2296, 7.4333, 7.6317, 7.8251, 7.5425,\n 7.2746, 7.0201, 7.2222, 6.9830, 7.1832, 7.3786, 7.5697, 7.3485,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.4983, 7.3054, 7.4885, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.2424, 9.3881, 9.5321, 9.3834, 9.5263,\n 9.6676, 9.5230, 9.6632, 9.8020, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.3358, 10.4667, 10.5963, 10.7246, 10.5903, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.7099, 10.5830, 10.7084, 10.5837, 10.7084, 10.8321,\n 10.9546, 10.8327, 10.9546, 11.0755, 11.1954, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.7809, 11.8953, 11.7779, 11.8918, 12.0049, 12.1171, 12.2286,\n 12.1139, 12.0005, 12.1118, 12.2222, 12.1107, 12.2207, 12.3299, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 12.8599,\n 12.9641, 13.0677, 13.1707, 13.0656, 12.9616, 13.0644, 13.1665, 13.2681,\n 13.1657, 13.0643, 13.1657, 13.0655, 13.1665, 13.2669, 13.3667, 13.2680,\n 13.3674, 13.4664, 13.5647, 13.6626, 13.7599, 13.8567, 13.9531, 14.0489,\n 14.1442, 14.0479, 14.1429, 14.2374, 14.3314, 14.4250, 14.3302, 14.2361,\n 14.3295, 14.4225, 14.3295, 14.4222, 14.5144, 14.4225, 14.5144, 14.6059,\n 14.6970, 14.7877, 14.8779, 14.9677, 15.0571, 14.9669, 15.0560, 15.1448,\n 15.2332, 15.1440, 15.0555, 15.1438, 15.2316, 15.3191, 15.2316, 15.1448,\n 15.2321, 15.1460, 15.2331, 15.3198, 15.4062, 15.3210, 15.4071, 15.4929,\n 15.5783, 15.6633, 15.7481, 15.8325, 15.9165, 16.0002, 16.0836, 16.0000,\n 16.0832, 16.1660, 16.2486, 16.3308, 16.2481, 16.1660, 16.2481])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill floated into the cave.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.5133, 1.6997, 1.8838, 1.8074,\n 1.7321, 1.9127, 1.8378, 2.0158, 1.9413, 2.1167, 2.0426, 1.9695,\n 1.8972, 1.8257, 1.9973, 1.9262, 1.8559, 2.0247, 1.9548, 2.1213,\n 2.0517, 2.2162, 2.1470, 2.0785, 2.0107, 1.9437, 2.1049, 2.0381,\n 1.9720, 2.1309, 2.0651, 2.2222, 2.1567, 2.3120, 2.2468, 2.1822,\n 2.3354, 2.4874, 2.6381, 2.5731, 2.5087, 2.6575, 2.5934, 2.7406,\n 2.6768, 2.8226, 2.7591, 2.6961, 2.6336, 2.5717, 2.7153, 2.6536,\n 2.5925, 2.7344, 2.6735, 2.8141, 2.7534, 2.8928, 2.8324, 2.7724,\n 2.9103, 3.0471, 3.1831, 3.1229, 3.0632, 3.1977, 3.1382, 3.2717,\n 3.2124, 3.3447, 3.2857, 3.2271, 3.1690, 3.1113, 3.2419, 3.1844,\n 3.1273, 3.2567, 3.1998, 3.3282, 3.2715, 3.3989, 3.3424, 3.2863,\n 3.4126, 3.5382, 3.6629, 3.6067, 3.5508, 3.6745, 3.6188, 3.7417,\n 3.6862, 3.8081, 3.7528, 3.6979, 3.8189, 3.9392, 4.0589, 4.0038,\n 3.9491, 4.0678, 4.0132, 4.1312, 4.0768, 4.1940, 4.1399, 4.0860,\n 4.2023, 4.3180, 4.4331, 4.3792, 4.3256, 4.4399, 4.3864, 4.5000,\n 4.4468, 4.5596, 4.5066, 4.4538, 4.5659, 4.6775, 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "185", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "69.2%", + "z-score": "13.9", + "p value": "4.17e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.1551, 9.3081, 9.4589, 9.2874, 9.4373, 9.5852, 9.7312, 9.5668,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.6838, 9.8254, 9.9653, 10.1036,\n 9.9540, 9.8072, 9.9454, 10.0820, 10.2172, 10.3510, 10.2093, 10.0701,\n 10.2036, 10.0673, 10.2000, 10.3314, 10.4614, 10.5903, 10.7179, 10.8444,\n 10.9697, 10.8388, 10.9634, 11.0870, 11.2094, 11.3308, 11.4512, 11.3244,\n 11.4442, 11.5630, 11.4388, 11.5570, 11.6743, 11.7907, 11.9062, 12.0208,\n 11.9001, 12.0142, 11.8953, 12.0089, 11.8918, 12.0049, 12.1171, 12.2286,\n 12.3393, 12.2248, 12.1118, 12.2222, 12.1107, 12.0005, 12.1107, 12.0020,\n 12.1117, 12.0044, 11.8982, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.1244, 12.2314, 12.1295, 12.2360, 12.3419, 12.2414, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.3586, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.7755, 12.8766, 12.9771, 12.8819, 12.9820, 13.0815, 13.1806, 13.2791,\n 13.3770, 13.2834, 13.3810, 13.2882, 13.3854, 13.2936, 13.3905, 13.4868,\n 13.5827, 13.6781, 13.5876, 13.4977, 13.5929, 13.5039, 13.4155, 13.5105,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.8120, 13.9042, 13.8193, 13.9111, 14.0025, 13.9185, 14.0096, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.1149, 14.0329, 13.9515, 13.8707, 13.7904,\n 13.8804])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill floated into the cave for hours.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.6590, 1.9245, 1.8034, 1.6859, 1.5717, 1.8257,\n 2.0738, 2.3163, 2.2011, 2.0889, 1.9795, 1.8728, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.2576, 2.1602,\n 2.0647, 1.9711, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185, 2.0207,\n 2.2200, 2.1320, 2.3276, 2.2404, 2.1546, 2.3462, 2.5352, 2.7217,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.5660, 2.4841, 2.6632, 2.5820,\n 2.7585, 2.9329, 2.8518, 2.7717, 2.9433, 2.8638, 3.0330, 3.2004,\n 3.1211, 3.0429, 2.9656, 2.8893, 3.0533, 2.9775, 2.9025, 2.8284,\n 2.7552, 2.9161, 2.8433, 3.0022, 2.9299, 3.0870, 3.0151, 2.9439,\n 3.0989, 3.0282, 2.9582, 2.8889, 2.8203, 2.9726, 2.9044, 2.8368,\n 2.7699, 2.7037, 2.8534, 2.7875, 2.9357, 3.0827, 3.0168, 2.9515,\n 3.0967, 3.2408, 3.3838, 3.3182, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.2025, 3.3420, 3.2788, 3.4171, 3.5544, 3.4913, 3.4286, 3.5645,\n 3.5022, 3.6369, 3.7707, 3.7084, 3.8411, 3.7791, 3.7176, 3.8490,\n 3.7878, 3.7270, 3.6667, 3.6068, 3.7366, 3.6770, 3.8057, 3.7463,\n 3.8741, 3.8150, 3.7563, 3.8829, 3.8244, 3.7664, 3.7087, 3.6515,\n 3.7766, 3.7196, 3.6629, 3.6067, 3.5508, 3.6745, 3.6188, 3.7417,\n 3.8638, 3.8081, 3.7528, 3.8740, 3.9945, 4.1143, 4.0589, 4.0038,\n 3.9491, 3.8947, 4.0132, 3.9590, 4.0768, 4.0228, 4.1399, 4.2563,\n 4.2023, 4.1487, 4.2642, 4.2108, 4.3256, 4.4399, 4.3864, 4.3333,\n 4.2805, 4.2280, 4.3412, 4.2889, 4.2369, 4.1851, 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "180", + "Fraction of T in Greenlist": "90.5%", + "z-score": "21.3", + "p value": "3.46e-101", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.4730, 7.6667, 7.8558, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.7438, 8.9113, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.7996, 9.9491, 10.0965, 10.2419, 10.3853, 10.5269, 10.3333,\n 10.4748, 10.6145, 10.7524, 10.8887, 11.0234, 11.1566, 11.2882, 11.4184,\n 11.2414, 11.3715, 11.5002, 11.6276, 11.7536, 11.8784, 12.0020, 12.1244,\n 12.2456, 12.3656, 12.2025, 12.3225, 12.4414, 12.5592, 12.6760, 12.7918,\n 12.9066, 13.0204, 13.1333, 12.9804, 13.0932, 13.2052, 13.3162, 13.4263,\n 13.5357, 13.6441, 13.7518, 13.8587, 13.9648, 14.0701, 14.1747, 14.2786,\n 14.1370, 14.2408, 14.3439, 14.4463, 14.5480, 14.6491, 14.7495, 14.8492,\n 14.9484, 14.8136, 14.9127, 15.0111, 15.1090, 15.2062, 15.3029, 15.3990,\n 15.4945, 15.5895, 15.6839, 15.7778, 15.8711, 15.9640, 15.8371, 15.9299,\n 16.0222, 16.1140, 16.2053, 16.2961, 16.3864, 16.4763, 16.5657, 16.4438,\n 16.5332, 16.6221, 16.7106, 16.7986, 16.8862, 16.9734, 17.0601, 17.1464,\n 17.2323, 17.3178, 17.4029, 17.4877, 17.3717, 17.4564, 17.5407, 17.6246,\n 17.7082, 17.7913, 17.8741, 17.9566, 18.0386, 17.9266, 18.0086, 18.0903,\n 18.1717, 18.2527, 18.3333, 18.4137, 18.4937, 18.5733, 18.6527, 18.7317,\n 18.8104, 18.7027, 18.7814, 18.8598, 18.9379, 19.0156, 19.0931, 19.1703,\n 19.0652, 19.1423, 19.2192, 19.2957, 19.3720, 19.4480, 19.5237, 19.5992,\n 19.6743, 19.7492, 19.8238, 19.8982, 19.9723, 20.0461, 20.1197, 20.0189,\n 20.0925, 20.1658, 20.2388, 20.3116, 20.3842, 20.4565, 20.3579, 20.4302,\n 20.5022, 20.5740, 20.6456, 20.7169, 20.7880, 20.8589, 20.9296, 21.0000,\n 21.0702, 21.1402, 21.0446, 21.1145, 21.1843, 21.2538, 21.3231])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill pushed Harry off the sofa for hours.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.7233,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.4967, -1.5483, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.1613, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.4974, -0.5410, -0.5843, -0.6274, -0.4914, -0.5345,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "170", + "Fraction of T in Greenlist": "85.4%", + "z-score": "19.7", + "p value": "1.42e-86", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.4432, 9.5969, 9.7483, 9.8976, 10.0448, 10.1900, 10.3333,\n 10.4748, 10.6145, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 11.1098,\n 11.2414, 11.3715, 11.5002, 11.3294, 11.4579, 11.5851, 11.7110, 11.8357,\n 11.9591, 12.0814, 12.2025, 12.3225, 12.1633, 12.2832, 12.4019, 12.5196,\n 12.6363, 12.7520, 12.8667, 12.9804, 13.0932, 12.9437, 13.0564, 13.1681,\n 13.2791, 13.3891, 13.4983, 13.6067, 13.7143, 13.8211, 13.6796, 13.7862,\n 13.8922, 13.9974, 14.1018, 14.2055, 14.3086, 14.4109, 14.5125, 14.3778,\n 14.4794, 14.5803, 14.6805, 14.7802, 14.8792, 14.9775, 15.0753, 15.1725,\n 15.0437, 15.1408, 15.2374, 15.3333, 15.4287, 15.5236, 15.6179, 15.7117,\n 15.8049, 15.6814, 15.7746, 15.8673, 15.9594, 16.0511, 16.1423, 16.2330,\n 16.3233, 16.4130, 16.2941, 16.3838, 16.4731, 16.5619, 16.6503, 16.7382,\n 16.8257, 16.9127, 16.9994, 17.0856, 17.1715, 17.0574, 16.9444, 17.0305,\n 17.1162, 17.2016, 17.2865, 17.3710, 17.2607, 17.3452, 17.2361, 17.3205,\n 17.2127, 17.2970, 17.3810, 17.4645, 17.5477, 17.6305, 17.7130, 17.7951,\n 17.8769, 17.9583, 17.8539, 17.9353, 18.0163, 18.0970, 18.1774, 18.2574,\n 18.3371, 18.2351, 18.3147, 18.2137, 18.2933, 18.1933, 18.2728, 18.3519,\n 18.4308, 18.5094, 18.5876, 18.6656, 18.7432, 18.8206, 18.8977, 18.8004,\n 18.8774, 18.9541, 19.0306, 19.1067, 19.1826, 19.2582, 19.1629, 19.2384,\n 19.1439, 19.2194, 19.1257, 19.2011, 19.2762, 19.3511, 19.4257, 19.5000,\n 19.5741, 19.6479, 19.7215, 19.7949, 19.7034, 19.6126, 19.6860])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill floated down the river for hours.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.1831, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.3333,\n 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.6713, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.7928, 2.7005, 2.8919, 2.8006, 2.7107, 2.8983, 3.0833, 3.2660,\n 3.1760, 3.0873, 3.0000, 2.9140, 3.0924, 3.0071, 3.1829, 3.0984,\n 3.2717, 3.4429, 3.3587, 3.2757, 3.4442, 3.3619, 3.5282, 3.6927,\n 3.6107, 3.5298, 3.4498, 3.3708, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.2242, 3.3826, 3.3075, 3.4641, 3.3895, 3.5443, 3.4702, 3.3968,\n 3.5496, 3.4768, 3.4047, 3.3333, 3.4839, 3.6332, 3.5620, 3.4915,\n 3.4217, 3.3526, 3.4995, 3.4308, 3.5762, 3.7205, 3.6519, 3.5839,\n 3.7265, 3.8680, 4.0085, 3.9404, 3.8730, 3.8061, 3.7399, 3.8784,\n 3.8125, 3.9497, 3.8841, 4.0202, 4.1552, 4.0898, 4.0249, 4.1586,\n 4.0941, 4.2267, 4.3583, 4.2940, 4.4246, 4.3605, 4.2970, 4.4264,\n 4.3631, 4.3004, 4.2381, 4.1763, 4.3042, 4.2426, 4.3695, 4.3083,\n 4.4342, 4.3733, 4.3128, 4.4376, 4.3774, 4.3176, 4.2582, 4.3818,\n 4.5047, 4.4454, 4.3865, 4.3280, 4.2699, 4.3915, 4.3336, 4.4544,\n 4.5744, 4.5166, 4.4593, 4.5783, 4.6968, 4.8146, 4.7572, 4.7001,\n 4.6434, 4.5871, 4.7037, 4.6476, 4.7635, 4.7076, 4.8227, 4.9373,\n 4.8815, 4.8260, 4.9397, 4.8845, 4.9975, 5.1100, 5.0548, 5.0000,\n 4.9455, 4.8913, 5.0027, 4.9487, 4.8950, 4.8416, 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "180", + "Fraction of T in Greenlist": "90.5%", + "z-score": "21.3", + "p value": "3.46e-101", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 8.2923, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.7438, 8.9113, 9.0759, 9.2376, 9.3967, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.7996, 9.9491, 10.0965, 10.2419, 10.3853, 10.5269, 10.6667,\n 10.8047, 10.9411, 10.7524, 10.8887, 11.0234, 11.1566, 11.2882, 11.4184,\n 11.5473, 11.6747, 11.8008, 11.6276, 11.7536, 11.8784, 12.0020, 12.1244,\n 12.2456, 12.3656, 12.4846, 12.6025, 12.7194, 12.8352, 12.9501, 13.0639,\n 12.9066, 13.0204, 13.1333, 13.2453, 13.3564, 13.4666, 13.5760, 13.6845,\n 13.7923, 13.6441, 13.7518, 13.8587, 13.9648, 14.0701, 14.1747, 14.2786,\n 14.3818, 14.4842, 14.5860, 14.6871, 14.7875, 14.8873, 14.7495, 14.8492,\n 14.9484, 15.0469, 15.1448, 15.2420, 15.3387, 15.4349, 15.5304, 15.3990,\n 15.4945, 15.5895, 15.6839, 15.7778, 15.8711, 15.9640, 16.0563, 16.1481,\n 16.2395, 16.3303, 16.4206, 16.5105, 16.3864, 16.4763, 16.5657, 16.6547,\n 16.7432, 16.8312, 16.9188, 17.0060, 17.0928, 16.9734, 17.0601, 17.1464,\n 17.2323, 17.3178, 17.4029, 17.4877, 17.5720, 17.6559, 17.7395, 17.8227,\n 17.9055, 17.9879, 17.8741, 17.9566, 18.0386, 18.1204, 18.2017, 18.2828,\n 18.3634, 18.4438, 18.5238, 18.4137, 18.4937, 18.5733, 18.6527, 18.7317,\n 18.8104, 18.8888, 18.9669, 19.0447, 19.1222, 19.1994, 19.2763, 19.3529,\n 19.2472, 19.3238, 19.4001, 19.4761, 19.5518, 19.6272, 19.7024, 19.7773,\n 19.8520, 19.7492, 19.8238, 19.8982, 19.9723, 20.0461, 20.1197, 20.1930,\n 20.2661, 20.3389, 20.4115, 20.4838, 20.5559, 20.6277, 20.5286, 20.6004,\n 20.6720, 20.7434, 20.8145, 20.8854, 20.9560, 21.0265, 21.0967, 21.0000,\n 21.0702, 21.1402, 21.2099, 21.2795, 21.3488, 21.2538, 21.3231])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill floated down the river.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill pushed Harry along the trail for hours.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.15", + "p value": "0.875", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill pushed Harry along the trail.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "27.5%", + "z-score": "0.608", + "p value": "0.271", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714, 0.8083, 0.6794, 0.5556,\n 0.4364, 0.3216, 0.6325, 0.5185, 0.8165, 1.1055, 0.9901, 0.8783, 1.1547,\n 1.4237, 1.3112, 1.2019, 1.0954, 0.9918, 0.8909, 0.7924, 0.6963, 0.9467,\n 1.1918, 1.0948, 1.0000, 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857,\n 0.7006, 0.6172, 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547,\n 0.5774, 0.7877, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.7746, 0.7057,\n 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.7385, 0.6732, 0.6086,\n 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357, 0.4103, 0.3499, 0.5222,\n 0.4619, 0.6319, 0.5717, 0.5120, 0.6794, 0.6198, 0.5608, 0.5023, 0.4444,\n 0.6083])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 4.1003, 4.2601, 4.4182, 4.5747, 4.4901, 4.6448, 4.5611, 4.7140,\n 4.8655, 5.0156, 5.1643, 5.0807, 5.2278, 5.3736, 5.5181, 5.4349,\n 5.3526, 5.2713, 5.4140, 5.3333, 5.2535, 5.3947, 5.3156, 5.2372,\n 5.3769, 5.5155, 5.6530, 5.7894, 5.7112, 5.8464, 5.9806, 6.1137,\n 6.2459, 6.1680, 6.0908, 6.2217, 6.3517, 6.4807, 6.6089, 6.7361,\n 6.8624, 6.9879, 7.1125, 7.0353, 7.1590, 7.0823, 7.2051, 7.1291,\n 7.2510, 7.3721, 7.4924, 7.6120, 7.7308, 7.8489, 7.9663, 7.8905,\n 7.8153, 7.7407, 7.8571, 7.9729, 8.0880, 8.2024, 8.1282, 8.2420,\n 8.3550, 8.4674, 8.5792, 8.5054, 8.4322, 8.5433, 8.6537, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.5161, 9.6214,\n 9.7261, 9.6532, 9.7574, 9.8611, 9.9642, 9.8918, 9.8198, 9.7483,\n 9.8510, 9.7800, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.0061, 10.1058, 10.2050, 10.3038, 10.4021, 10.3333,\n 10.4312, 10.3628, 10.4603, 10.5573, 10.4893, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe road zigzagged down the hill.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "69", + "Fraction of T in Greenlist": "34.7%", + "z-score": "3.15", + "p value": "0.000812", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547, 0.9802, 0.8165,\n 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428, 0.8083, 1.1323, 1.0000,\n 1.3093, 1.1793, 1.0541, 0.9333, 1.2247, 1.1055, 0.9901, 0.8783, 1.1547,\n 1.0441, 0.9366, 0.8321, 0.7303, 0.6312, 0.5345, 0.7924, 1.0445, 0.9467,\n 1.1918, 1.0948, 1.0000, 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857,\n 1.0120, 1.2344, 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456,\n 0.8660, 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.3641, 1.2910, 1.4755,\n 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.7951, 1.7233, 1.6524, 1.5823,\n 1.7552, 1.9262, 1.8559, 2.0247, 1.9548, 1.8856, 1.8173, 1.9829, 2.1470,\n 2.0785, 2.0107, 1.9437, 1.8773, 1.8116, 1.7467, 1.6823, 1.8419, 2.0000,\n 1.9355, 2.0918, 2.0276, 2.1822, 2.1182, 2.0548, 1.9920, 2.1442, 2.2952,\n 2.2323, 2.3817, 2.3190, 2.2569, 2.1954, 2.3426, 2.2813, 2.4271, 2.3660,\n 2.3054, 2.2454, 2.3891, 2.5318, 2.4717, 2.6131, 2.5532, 2.4938, 2.4348,\n 2.5744, 2.5156, 2.6540, 2.5954, 2.5373, 2.4797, 2.6163, 2.7520, 2.6943,\n 2.8288, 2.7713, 2.7143, 2.6576, 2.7906, 2.9227, 2.8660, 2.8098, 2.7539,\n 2.6984, 2.8288, 2.9584, 2.9029, 3.0315, 2.9761, 2.9212, 2.8666, 2.9938,\n 3.1203, 3.0657, 3.0114, 2.9575, 2.9040, 3.0290, 3.1532, 3.0997, 3.2230,\n 3.1696, 3.1166, 3.0638, 3.1860, 3.1334, 3.0811, 3.0292, 2.9776, 2.9263,\n 3.0469, 3.1669, 3.1156, 3.2348, 3.1836, 3.1327, 3.0821, 3.2002, 3.1497,\n 3.0995, 3.0496, 3.0000, 2.9507, 3.0674, 3.1836, 3.1342, 3.2496, 3.2004,\n 3.1514])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.1864, 5.0684, 5.2485, 5.1326, 5.0190, 4.9075,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.4909, 5.6585, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.4501, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.0711,\n 6.9759, 7.1152, 7.2532, 7.1591, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.6064, 7.5143, 7.4233, 7.5556, 7.6867, 7.5967, 7.7268, 7.8558,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.2733, 8.1850, 8.3093, 8.2219,\n 8.3453, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.1615, 9.0773, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.2554, 9.1735, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.5840, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.4581, 11.3837, 11.3099, 11.2366,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.5235, 11.6179, 11.7120, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe rope stretched over the pulley.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.29", + "p value": "0.000508", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.3926, 1.2910, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.3744, 1.6013, 1.5068, 1.7285, 1.9462, 1.8516,\n 2.0647, 1.9711, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.8477, 1.7634, 1.9604, 1.8766, 2.0702, 1.9870, 1.9052,\n 1.8245, 2.0135, 1.9333, 1.8543, 2.0397, 2.2226, 2.1436, 2.3238,\n 2.2453, 2.1678, 2.3448, 2.5198, 2.4423, 2.6148, 2.5378, 2.7080,\n 2.6316, 2.5560, 2.4814, 2.6485, 2.5743, 2.5011, 2.4286, 2.3570,\n 2.2862, 2.4495, 2.3791, 2.3094, 2.4703, 2.6296, 2.5600, 2.7175,\n 2.6481, 2.8039, 2.7349, 2.6667, 2.5991, 2.7524, 2.6852, 2.8368,\n 2.7699, 2.9200, 2.8534, 3.0019, 3.1492, 3.0827, 3.0168, 2.9515,\n 2.8868, 2.8226, 2.7591, 2.6961, 2.8402, 2.9832, 2.9202, 3.0619,\n 2.9991, 3.1395, 3.0770, 3.0151, 2.9537, 3.0923, 3.0311, 3.1685,\n 3.1076, 3.2437, 3.1831, 3.3181, 3.4521, 3.3915, 3.3314, 3.2717,\n 3.2124, 3.1536, 3.0952, 3.0373, 3.1690, 3.2998, 3.2419, 3.3717,\n 3.3140, 3.4428, 3.3853, 3.3282, 3.2715, 3.3989, 3.3424, 3.2863])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe weights stretched the rope over the pulley.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "24.5%", + "z-score": "-0.137", + "p value": "0.555", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -0.6472, -0.3651,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -0.8165, -0.6383, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.1849, -0.0461, -0.0919, -0.1374])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe weights kept the rope stretched over the pulley.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -0.8076, -0.8577, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.4565, -1.4985,\n -1.3536, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.1167, -2.1532, -2.0212, -2.0578, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nSam cut himself free.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "187", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "29.4%", + "z-score": "1.39", + "p value": "0.0818", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.6499,\n 1.8173, 1.9829, 1.9149, 1.8475, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.7467, 1.9066, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.9920, 2.1442, 2.0817, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.7772, 1.9261, 2.0739, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.6246, 1.7619, 1.8983, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.7158, 1.8490, 1.9813, 1.9291, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.8033, 1.9327, 2.0613, 2.0105, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nSam got free by cutting his finger.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -1.6330,\n -1.7086, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.6654, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -1.8091, -1.8523, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -1.8983, -1.9392, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.1131, -2.1509, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.1896, -2.2258, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill cried himself to sleep.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill cried Sue to sleep.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -1.8974,\n -1.7321, -1.5681, -1.6138, -1.4517, -1.4976, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.2516, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill squeezed himself through the hole.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill sang himself to sleep.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 10.2172, 10.3510, 10.2093, 10.0701,\n 9.9333, 10.0673, 9.9333, 9.8015, 9.9351, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.3459, 10.2283, 10.1124, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.5363, 10.4304, 10.3257, 10.2222, 10.3411, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.7211, 10.8353, 10.9488, 10.8498, 10.7517,\n 10.6547, 10.5587, 10.4636, 10.5769, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 11.0554, 11.1640, 11.0724, 11.1803, 11.0897,\n 10.9998, 11.1073, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.6827, 11.5966, 11.5111, 11.6137,\n 11.5290, 11.4450, 11.5471, 11.4638, 11.5655, 11.6666, 11.5841, 11.6847,\n 11.6029, 11.5217, 11.4411, 11.5414, 11.4614, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.4880, 12.4109, 12.3342, 12.2581,\n 12.3523, 12.2767, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill squeezed the puppet through the hole.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "92", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "28.3%", + "z-score": "0.722", + "p value": "0.235", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.8520, 0.7868, 0.7223])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill sang Sue to sleep.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 4.1003, 4.2601, 4.1761, 4.3339, 4.4901, 4.6448, 4.5611, 4.4783,\n 4.6311, 4.7823, 4.7001, 4.8497, 4.9980, 5.1450, 5.2906, 5.2085,\n 5.3526, 5.4956, 5.4140, 5.3333, 5.4747, 5.3947, 5.5348, 5.6737,\n 5.8114, 5.7318, 5.8684, 6.0038, 6.1382, 6.0590, 6.1923, 6.3246,\n 6.2459, 6.1680, 6.2990, 6.2217, 6.3517, 6.4807, 6.6089, 6.5320,\n 6.6591, 6.7854, 6.9107, 6.8343, 6.9587, 7.0823, 7.0063, 6.9310,\n 7.0537, 6.9789, 7.1007, 7.2217, 7.3419, 7.2675, 7.3869, 7.5056,\n 7.6235, 7.5495, 7.6667, 7.7831, 7.7096, 7.6368, 7.7524, 7.6800,\n 7.7949, 7.9091, 8.0227, 7.9507, 8.0636, 8.1758, 8.2874, 8.2158,\n 8.3268, 8.4371, 8.3660, 8.2954, 8.4050, 8.3349, 8.4439, 8.5524,\n 8.6603, 8.5905, 8.6978, 8.8045, 8.9107, 8.8413, 8.9469, 9.0520,\n 8.9830, 8.9145, 9.0190, 8.9509, 9.0549, 9.1584, 9.2613, 9.1936,\n 9.2960, 9.3980, 9.4995, 9.4321, 9.5331, 9.6336, 9.5666, 9.5000,\n 9.6000, 9.5338, 9.6334, 9.7325, 9.8311, 9.7653, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe elevator rumbled itself to the ground.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.3744, 1.2810, 1.5068, 1.4142, 1.6348, 1.5430,\n 1.7589, 1.9711, 1.8791, 2.0870, 2.2916, 2.1997, 2.1094, 2.0207,\n 1.9335, 2.1320, 2.0455, 2.2404, 2.1546, 2.3462, 2.2611, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 2.2453, 2.4228, 2.3448, 2.5198, 2.4423, 2.6148, 2.5378, 2.7080,\n 2.8764, 2.7995, 2.9656, 3.1300, 3.0533, 2.9775, 2.9025, 2.8284,\n 2.9897, 2.9161, 3.0754, 3.0022, 3.1597, 3.0870, 3.2426, 3.3968,\n 3.3243, 3.2525, 3.1814, 3.1111, 3.0415, 2.9726, 2.9044, 3.0551,\n 3.2044, 3.1363, 3.2841, 3.2163, 3.3627, 3.2953, 3.4402, 3.5839,\n 3.5166, 3.6590, 3.8002, 3.7330, 3.6664, 3.6004, 3.5350, 3.6742,\n 3.6091, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 3.8335, 3.7707, 3.7084, 3.6466, 3.5853, 3.7176, 3.8490,\n 3.7878, 3.9181, 3.8571, 3.9865, 3.9258, 4.0541, 4.1816, 4.1210,\n 4.2475, 4.3733, 4.3128, 4.2527, 4.1931, 4.1338, 4.2582, 4.1992,\n 4.3226, 4.2639, 4.3865, 4.5083, 4.4497, 4.5707, 4.5123, 4.4544,\n 4.3967, 4.3395, 4.2827, 4.2262, 4.1700, 4.2893, 4.4080, 4.3519,\n 4.4698, 4.4140, 4.5311, 4.4754, 4.5918, 4.7076, 4.6520, 4.7670,\n 4.8815, 4.8260, 4.7709, 4.7161, 4.6616, 4.7749, 4.7206, 4.8333,\n 4.7792, 4.8913, 4.8374, 4.7838, 4.8950, 4.8416, 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nIf the telephone rang, it could ring itself silly.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.0012, 3.8490, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.0684, 5.2485, 5.1326, 5.0190, 4.9075,\n 4.7980, 4.6904, 4.8669, 5.0410, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.5035, 5.4000, 5.2981, 5.1978, 5.0990, 5.0017, 4.9058,\n 4.8113, 4.7181, 4.6262, 4.5356, 4.4462, 4.6070, 4.7662, 4.9237,\n 5.0795, 5.2338, 5.1444, 5.0562, 4.9691, 4.8830, 4.7980, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.3086, 4.2303, 4.1528, 4.3027,\n 4.2258, 4.1497, 4.0745, 4.0000, 4.1475, 4.2938, 4.2196, 4.1461,\n 4.0734, 4.0015, 3.9302, 3.8596, 3.7897, 3.7205, 3.6519, 3.5839,\n 3.7265, 3.6590, 3.8002, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.6181, 3.5544, 3.4913, 3.4286, 3.3665,\n 3.3049, 3.4403, 3.3789, 3.3181, 3.2577, 3.1977, 3.3314, 3.4641,\n 3.4042, 3.3447, 3.2857, 3.2271, 3.1690, 3.1113, 3.0540, 2.9971,\n 2.9406, 2.8845, 3.0143, 2.9584, 3.0872, 3.0315, 2.9761, 3.1038,\n 3.2306, 3.1753, 3.1203, 3.2460, 3.1912, 3.1368, 3.0827, 3.0290,\n 2.9756, 2.9225, 2.8698, 2.8174, 2.7654, 2.7137, 2.6623, 2.7852,\n 2.7340, 2.6830, 2.6323, 2.5820, 2.7036, 2.8245, 2.7741, 2.7240,\n 2.6742, 2.6247, 2.5754, 2.5265, 2.4778, 2.4294, 2.3812, 2.3333,\n 2.4520, 2.4042, 2.5220, 2.4744, 2.4269, 2.5439, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.0014, 6.8810, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.6339, 7.7784, 7.6681, 7.5593, 7.7026, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.3521, 8.2496,\n 8.3828, 8.2816, 8.1816, 8.3138, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.3630, 10.4738, 10.3853, 10.2975,\n 10.2106, 10.1243, 10.0389, 9.9542, 10.0647, 10.1745, 10.2837, 10.1999,\n 10.1167, 10.2253, 10.1429, 10.2509, 10.1692, 10.2766, 10.1955, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.2753, 10.1968, 10.1189, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.0987, 10.0231, 9.9481, 9.8736, 9.7996,\n 9.7261, 9.6532, 9.5808, 9.6850, 9.6130, 9.7167, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.8770, 10.8064, 10.7363, 10.8333,\n 10.7637, 10.6944, 10.6256, 10.5573, 10.4893, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nShe yelled hoarse.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "144", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "58.3%", + "z-score": "9.24", + "p value": "1.26e-20", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.4346, 9.5543, 9.4606, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.7622, 9.6758, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.6471, 9.5638, 9.4812, 9.3993, 9.3181, 9.2376])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 7.7426, 7.8928,\n 7.7710, 7.6512, 7.8000, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 7.8905, 8.0335, 7.9216, 8.0632, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.8987, 9.7986, 9.6995, 9.8198,\n 9.9392, 10.0577, 9.9601, 9.8634, 9.9813, 9.8858, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.0698, 10.1840, 10.0926, 10.0021,\n 10.1157, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.2106, 10.3209, 10.2348, 10.3445, 10.4537, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.6306, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.6412, 11.7405, 11.8393, 11.9377,\n 11.8579, 11.7787, 11.8766, 11.7980, 11.7200, 11.8176, 11.9147, 12.0114,\n 12.1076, 12.0302, 11.9534, 12.0493, 11.9730, 11.8973, 11.9928, 11.9176,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.0529, 12.1468, 12.0731, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nTed cried to sleep.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -0.8076, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.7143, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.4147, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.1721, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.2111, 0.1684, 0.1260, 0.2513, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 10.2172, 10.3510, 10.2093, 10.0701,\n 9.9333, 10.0673, 9.9333, 9.8015, 9.9351, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.3459, 10.2283, 10.1124, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.5363, 10.4304, 10.3257, 10.2222, 10.3411, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.7211, 10.8353, 10.9488, 10.8498, 10.7517,\n 10.6547, 10.5587, 10.4636, 10.5769, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 11.0554, 11.1640, 11.0724, 11.1803, 11.0897,\n 10.9998, 11.1073, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.6827, 11.5966, 11.5111, 11.6137,\n 11.5290, 11.4450, 11.5471, 11.4638, 11.5655, 11.6666, 11.5841, 11.6847,\n 11.6029, 11.5217, 11.4411, 11.5414, 11.4614, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.4880, 12.4109, 12.3342, 12.2581,\n 12.3523, 12.2767, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe tiger bled to death.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -1.9640, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.4618,\n -2.2644, -2.3126, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.5796, -2.4000, -2.4444, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.6961, -2.5303, -2.5717, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.5342, -2.3764,\n -2.4170, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.3891, -2.2381, -2.2780, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.0292, -2.8913, -2.9263, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHe coughed awake and we were all overjoyed, especially Sierra.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 0.8729,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 1.0139, 1.1547,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.0215, 0.9742, 0.9272, 1.0565, 1.0096, 1.1380, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.0328, 0.9870, 0.9415, 1.0670, 1.0215,\n 0.9763, 1.1007, 1.2244, 1.1790, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.4546, 1.4093, 1.5298, 1.6496, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "152", + "Fraction of T in Greenlist": "76.4%", + "z-score": "16.7", + "p value": "3.39e-63", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.4293, 8.2281, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.9969, 10.1368, 9.9795, 9.8254, 9.6743, 9.5263,\n 9.6676, 9.5230, 9.3811, 9.5219, 9.6612, 9.5229, 9.6612, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.4614, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 10.9634, 10.8350, 10.7084, 10.5837, 10.7084, 10.8321,\n 10.9546, 11.0762, 11.1967, 11.0755, 11.1954, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.7809, 11.8953, 12.0089, 12.1216, 12.2336, 12.3447, 12.4550,\n 12.3393, 12.2248, 12.1118, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.2768, 13.3789, 13.4804, 13.5813, 13.6816, 13.5764, 13.4722,\n 13.3690, 13.2669, 13.3675, 13.4675, 13.5670, 13.6659, 13.7642, 13.6640,\n 13.7621, 13.8595, 13.9565, 14.0530, 14.1489, 14.2443, 14.3393, 14.4338,\n 14.5277, 14.6212, 14.7143, 14.8069, 14.7098, 14.6135, 14.5181, 14.4234,\n 14.5162, 14.6086, 14.7005, 14.7920, 14.8831, 14.7899, 14.8807, 14.9711,\n 15.0610, 15.1505, 15.2397, 15.3284, 15.4167, 15.5046, 15.5922, 15.6793,\n 15.7661, 15.8525, 15.7619, 15.6720, 15.5828, 15.4942, 15.5808, 15.6670,\n 15.7529, 15.8384, 15.9235, 15.8362, 15.9211, 16.0057, 16.0900, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.5062, 16.5884, 16.6704, 16.7520, 16.8333,\n 16.7481, 16.6634, 16.5793, 16.4957, 16.5772, 16.6584, 16.7393])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn coughed awake, rubbing his nose and cursing under his breath.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.2894, -0.3299, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "188", + "Fraction of T in Greenlist": "94.5%", + "z-score": "22.6", + "p value": "1.03e-113", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.9969, 10.1368, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 11.2022, 11.3293, 11.4551, 11.5797, 11.7031,\n 11.8254, 11.9466, 12.0667, 12.1857, 12.3037, 12.4207, 12.5367, 12.6517,\n 12.7659, 12.8790, 12.9913, 13.1028, 13.2133, 13.3231, 13.4320, 13.5401,\n 13.6474, 13.7539, 13.8597, 13.9648, 14.0691, 14.1727, 14.2756, 14.3778,\n 14.4794, 14.5803, 14.6805, 14.7802, 14.8792, 14.9775, 15.0753, 15.1725,\n 15.2691, 15.3652, 15.4606, 15.5556, 15.6499, 15.7438, 15.8371, 15.9299,\n 16.0222, 16.1140, 16.2053, 16.2961, 16.3864, 16.4763, 16.5657, 16.6547,\n 16.7432, 16.8312, 16.9188, 17.0060, 17.0928, 17.1791, 17.2650, 17.3506,\n 17.4357, 17.5204, 17.6047, 17.6887, 17.7722, 17.8554, 17.9382, 18.0207,\n 18.1028, 18.1845, 18.2659, 18.3469, 18.4276, 18.5080, 18.5880, 18.6677,\n 18.7470, 18.8260, 18.9048, 18.9832, 19.0612, 19.1390, 19.2165, 19.2937,\n 19.3705, 19.4471, 19.5234, 19.5994, 19.6751, 19.7506, 19.8257, 19.9006,\n 19.9752, 20.0495, 20.1236, 20.1974, 20.2709, 20.3442, 20.4173, 20.4900,\n 20.5626, 20.6348, 20.7069, 20.7786, 20.8502, 20.9215, 20.9926, 21.0634,\n 21.1340, 21.2044, 21.2745, 21.3444, 21.4141, 21.4836, 21.5529, 21.6219,\n 21.6907, 21.7594, 21.8278, 21.8960, 21.9639, 22.0317, 22.0993, 22.1667,\n 22.2338, 22.3008, 22.3676, 22.4342, 22.5006, 22.5668, 22.6328])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn coughed himself awake on the bank of the lake where he and Bill had their play.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.7543, 0.9238, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.2060, 1.1514, 1.0973, 1.2423, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.1183, 1.2597, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.2257, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.1991, 1.3284, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.3453, 1.4713, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "138", + "# Tokens in Greenlist": "81", + "Fraction of T in Greenlist": "58.7%", + "z-score": "9.14", + "p value": "3.08e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735, 6.0212, 6.2598,\n 6.4902, 6.1968, 5.9214, 6.1546, 5.8989, 5.6569, 5.4271, 5.2085, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 5.3072, 5.1257, 5.3468, 5.1723, 5.3886,\n 5.6000, 5.8068, 6.0093, 6.2075, 6.4019, 6.5924, 6.4273, 6.2668, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 6.6953, 6.5485, 6.4051, 6.2651, 6.1283,\n 6.3058, 6.1721, 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 6.0125, 6.1828,\n 6.0622, 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.2106, 6.0982, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.5137, 6.4065, 6.5607, 6.7132, 6.8641,\n 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.3855, 7.5275, 7.6681,\n 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139, 8.1483, 8.2816, 8.1816,\n 8.3138, 8.2151, 8.3463, 8.2488, 8.1524, 8.0571, 8.1873, 8.0931, 8.0000,\n 7.9079, 7.8168, 7.9460, 7.8558, 7.9839, 8.1111, 8.0219, 8.1481, 8.0598,\n 8.1850, 8.0976, 8.2219, 8.1354, 8.0497, 7.9649, 7.8808, 8.0042, 8.1266,\n 8.2483, 8.3691, 8.2858, 8.4057, 8.5249, 8.6433, 8.5607, 8.6783, 8.7952,\n 8.9113, 9.0267, 9.1414])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nRon yawned himself awake.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 1.3859, 1.7233, 2.0466, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.6859, 1.9415, 1.8257,\n 2.0738, 2.3163, 2.5533, 2.4371, 2.6681, 2.8943, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.6571, 4.8394, 5.0190, 4.9075,\n 5.0844, 5.2590, 5.4312, 5.3211, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 6.8034, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.9178, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.3630, 10.4738, 10.5841, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.3043, 11.4080, 11.5111, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.6487, 11.7498, 11.8503, 11.9504, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.0824, 12.1805, 12.2782, 12.3754, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.5024, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.8019, 12.8957, 12.9891, 12.9099, 13.0030, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nShe coughed herself awake as the leaf landed on her nose.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "136", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "39.0%", + "z-score": "3.76", + "p value": "8.41e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.5744, 2.8301, 3.0792, 2.9424, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.4816, 3.3566, 3.2348, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.4101, 3.6141, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.3824, 3.2796, 3.1787, 3.0796, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.8919, 3.0806, 3.2667, 3.4503, 3.3574, 3.2660,\n 3.1760, 3.3558, 3.2667, 3.4438, 3.6187, 3.5301, 3.4427, 3.3566,\n 3.2717, 3.1879, 3.3587, 3.5277, 3.4442, 3.3619, 3.2806, 3.2004,\n 3.1211, 3.0429, 2.9656, 2.8893, 3.0533, 2.9775, 3.1394, 3.2998,\n 3.4586, 3.3826, 3.3075, 3.2332, 3.3895, 3.3156, 3.4702, 3.6233,\n 3.5496, 3.4768, 3.4047, 3.3333, 3.2627, 3.4130, 3.5620, 3.4915,\n 3.4217, 3.3526, 3.2841, 3.2163, 3.1492, 3.0827, 3.0168, 2.9515,\n 3.0967, 3.2408, 3.3838, 3.5256, 3.4599, 3.3947, 3.3301, 3.4701,\n 3.4058, 3.5446, 3.6824, 3.6181, 3.5544, 3.4913, 3.6274, 3.7626])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "101", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "69.3%", + "z-score": "10.3", + "p value": "4.19e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.5354, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.3557, 8.1763, 8.0018, 7.8320, 7.6667,\n 7.5056, 7.3485, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.6719, 9.8058, 9.9384, 10.0698,\n 9.9433, 10.0737, 10.2030, 10.0791, 9.9570, 9.8367, 9.9656, 10.0935,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.4932, 10.3812, 10.2706, 10.1614, 10.2833])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe worm wriggled onto the carpet.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.8006, 0.9567, 0.8997, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.2443, 1.3943, 1.3373, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.2423, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.4792, 1.6127, 1.5617, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.6036,\n 1.7321, 1.6827, 1.8102, 1.7609, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.8660, 1.8175, 1.9419, 1.8935, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.7310, 1.8527, 1.8058, 1.9267, 1.8799, 2.0000,\n 2.1195, 2.0726, 2.0259, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 2.0370, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.2778, 5.1371, 5.3333,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.5258, 7.4316, 7.3386, 7.2466,\n 7.3810, 7.5143, 7.4233, 7.3333, 7.4655, 7.3765, 7.5076, 7.4194,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.4193, 7.3346, 7.2508, 7.1678,\n 7.2956, 7.4225, 7.3402, 7.4661, 7.5910, 7.5094, 7.6335, 7.5526,\n 7.6758, 7.5955, 7.7178, 7.6383, 7.5595, 7.4813, 7.4039, 7.3271,\n 7.4483, 7.5687, 7.4924, 7.4168, 7.5364, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.5495, 7.6667, 7.5933, 7.5204, 7.4482, 7.3765, 7.3054,\n 7.4215, 7.5369, 7.6517, 7.5809, 7.6950, 7.6246, 7.7380, 7.6681,\n 7.7808, 7.7114, 7.8233, 7.7544, 7.6859, 7.6179, 7.5504, 7.4833,\n 7.5944, 7.7048, 7.8147, 7.9241, 8.0328, 7.9659, 7.8995, 8.0076,\n 7.9415, 7.8759, 7.8107, 7.7460, 7.8533, 7.9601, 8.0663, 8.1721,\n 8.1075, 8.2127, 8.1485, 8.2531, 8.3572, 8.4608, 8.5640, 8.6667,\n 8.7689, 8.8706, 8.9718, 9.0726, 9.0085, 9.1088, 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe chocolate melted onto the carpet.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 0.7581, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.9623, 0.8926, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.9631, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 0.8847, 1.0284, 0.9759, 0.9238, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.8540, 0.8066, 0.9382, 1.0690,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.9165, 0.8704,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.8154, 0.9415, 1.0670, 1.1918,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.1990, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe ball wriggled itself loose.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "12.6%", + "z-score": "-4.05", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -1.9599, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.3190, -3.3619, -3.4044, -3.4466,\n -3.4883, -3.5298, -3.5708, -3.6116, -3.6520, -3.6920, -3.7318, -3.7712,\n -3.8104, -3.8492, -3.8877, -3.9260, -3.9639, -3.7730, -3.8115, -3.8497,\n -3.8877, -3.9254, -3.9628, -4.0000, -4.0369, -3.8534, -3.8908, -3.7097,\n -3.7476, -3.7852, -3.8225, -3.8596, -3.8965, -3.9331, -3.9694, -4.0056,\n -3.8315, -3.8680, -3.9043, -3.7330, -3.7697, -3.8061, -3.8424, -3.8784,\n -3.9141, -3.9497, -3.9850, -4.0202, -4.0551, -4.0898, -4.1243, -4.1586,\n -3.9954, -4.0301, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -4.0415,\n -4.0754, -4.1092, -4.1429, -4.1763, -4.2096, -4.2426, -4.2756, -4.3083,\n -4.3409, -4.3733, -4.2200, -4.2527, -4.2852, -4.3176, -4.3498, -4.3818,\n -4.2316, -4.0825, -4.1152, -4.1477, -4.1800, -4.0330, -3.8869, -3.9198,\n -3.9526, -3.8081, -3.6645, -3.6979, -3.7311, -3.7641, -3.6224, -3.6556,\n -3.6887, -3.5485, -3.5817, -3.6148, -3.6477, -3.6805, -3.7131, -3.7455,\n -3.7778, -3.8100, -3.8420, -3.8739, -3.9056, -3.9372, -3.9687, -4.0000,\n -4.0312, -4.0622, -4.0931, -4.1239, -3.9900, -4.0210, -4.0518])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "120", + "# Tokens in Greenlist": "95", + "Fraction of T in Greenlist": "79.2%", + "z-score": "13.7", + "p value": "4.86e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 8.8121, 8.9672, 9.1201, 9.2710, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.5406, 9.6838, 9.8254, 9.9653, 10.1036,\n 10.2404, 10.3758, 10.5096, 10.3621, 10.4952, 10.6270, 10.7575, 10.8866,\n 10.7442, 10.6043, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.0938, 11.2169, 11.3389, 11.4599, 11.3308, 11.4512, 11.5706,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 12.0289, 12.1432, 12.2565,\n 12.1346, 12.2474, 12.3595, 12.4708, 12.5812, 12.4622, 12.5723, 12.6815,\n 12.5646, 12.6735, 12.5583, 12.6667, 12.7743, 12.6611, 12.7683, 12.8749,\n 12.9807, 13.0859, 13.1904, 13.2942, 13.3974, 13.4999, 13.6019, 13.7032])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nBill wriggled himself loose.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.8489, 1.7408, 1.6353, 1.5323, 1.7685, 1.6667,\n 1.8970, 2.1229, 2.0211, 1.9215, 1.8240, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.8791, 2.0870, 2.2916, 2.1997, 2.1094, 2.0207,\n 2.2200, 2.1320, 2.0455, 2.2404, 2.1546, 2.3462, 2.2611, 2.4495,\n 2.3651, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 2.0158, 2.1918, 2.3658, 2.5378, 2.4618,\n 2.3868, 2.3126, 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.5207, 2.6828, 2.6112, 2.5403, 2.4703, 2.6296, 2.5600, 2.4910,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.0548, 2.2074, 2.1442, 2.0817, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.9863, 2.1344, 2.2813, 2.4271, 2.3660, 2.3054, 2.2454,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.0948, 2.0373, 1.9803,\n 1.9237, 1.8676, 2.0078, 1.9518, 1.8962, 1.8411, 1.9795, 1.9245,\n 1.8699, 2.0068, 2.1429, 2.2780, 2.2230, 2.1685, 2.1143, 2.2478,\n 2.1938, 2.1401, 2.2723, 2.4037, 2.5343, 2.4803, 2.4267, 2.3735,\n 2.5026, 2.4495, 2.3967, 2.5247, 2.6519, 2.7783, 2.7253, 2.6726,\n 2.6203, 2.7454, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.4099, 2.3603, 2.3110, 2.2620, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.3098, 2.2618, 2.2141, 2.3333,\n 2.4520, 2.5700, 2.5220, 2.4744, 2.4269, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nAliza wriggled her tooth loose.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "23.4%", + "z-score": "-0.391", + "p value": "0.652", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.3365, -0.3907])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe off center spinning flywheel shook itself loose.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, 0.2582, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.2756, -0.1374, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "197", + "Fraction of T in Greenlist": "99.0%", + "z-score": "24.1", + "p value": "1.08e-128", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.3509,\n 6.5813, 6.8041, 7.0200, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 8.2923, 8.4697, 8.6436, 8.8141, 8.9815,\n 9.1458, 9.3074, 9.4662, 9.6225, 9.7763, 9.9278, 10.0771, 10.2242,\n 10.3692, 10.5123, 10.6535, 10.7928, 10.9304, 11.0663, 11.2006, 11.3333,\n 11.4645, 11.5943, 11.7226, 11.8495, 11.9751, 12.0994, 12.2224, 12.3443,\n 12.4649, 12.5844, 12.7028, 12.8201, 12.9364, 13.0516, 13.1658, 13.2791,\n 13.3913, 13.5027, 13.6132, 13.7227, 13.8315, 13.9393, 14.0464, 14.1526,\n 14.2581, 14.3627, 14.4667, 14.5699, 14.6723, 14.7741, 14.8751, 14.9755,\n 15.0753, 15.1743, 15.2728, 15.3706, 15.4677, 15.5643, 15.6603, 15.7557,\n 15.8505, 15.9448, 16.0385, 16.1317, 16.2243, 16.3165, 16.4081, 16.4992,\n 16.5898, 16.6799, 16.7695, 16.8586, 16.9473, 17.0355, 17.1233, 17.2106,\n 17.2975, 17.3839, 17.4700, 17.5556, 17.6407, 17.7255, 17.8099, 17.8939,\n 17.9775, 18.0607, 18.1435, 18.2259, 18.3080, 18.3897, 18.4710, 18.5520,\n 18.6327, 18.7130, 18.7929, 18.8725, 18.9518, 19.0307, 19.1094, 19.1877,\n 19.2657, 19.3433, 19.4207, 19.4977, 19.5745, 19.6509, 19.7271, 19.8030,\n 19.8785, 19.9538, 20.0288, 20.1035, 20.1780, 20.2522, 20.3261, 20.3997,\n 20.4731, 20.5462, 20.6190, 20.6916, 20.7640, 20.8361, 20.9079, 20.9795,\n 21.0509, 21.1220, 21.1929, 21.2635, 21.3339, 21.4041, 21.4740, 21.5438,\n 21.6132, 21.6825, 21.7516, 21.8204, 21.8890, 21.9574, 22.0256, 22.0936,\n 22.1614, 22.2289, 22.2963, 22.3635, 22.4304, 22.4972, 22.5637, 22.6301,\n 22.6963, 22.7622, 22.8280, 22.8936, 22.9590, 23.0243, 23.0893, 23.1542,\n 23.2189, 23.2834, 23.3477, 23.4118, 23.4758, 23.5396, 23.6032, 23.6667,\n 23.7300, 23.7931, 23.8560, 23.9188, 23.9814, 24.0439, 24.1062])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe more you eat, the less you want.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "20.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "161", + "Fraction of T in Greenlist": "80.9%", + "z-score": "18.2", + "p value": "2.05e-74", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.4432, 9.5969, 9.7483, 9.8976, 10.0448, 10.1900, 10.3333,\n 10.4748, 10.6145, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 11.1098,\n 11.2414, 11.3715, 11.5002, 11.3294, 11.4579, 11.5851, 11.7110, 11.8357,\n 11.9591, 12.0814, 12.2025, 12.3225, 12.4414, 12.5592, 12.4019, 12.2474,\n 12.0957, 12.2150, 12.3333, 12.4506, 12.5669, 12.6822, 12.7965, 12.9099,\n 13.0225, 13.1341, 13.2448, 13.3547, 13.4638, 13.5721, 13.6796, 13.7862,\n 13.8922, 13.7539, 13.8597, 13.9648, 14.0691, 14.1727, 14.2756, 14.3778,\n 14.2449, 14.3470, 14.4484, 14.5492, 14.6494, 14.7489, 14.8478, 14.9461,\n 15.0437, 15.1408, 15.2374, 15.3333, 15.4287, 15.5236, 15.6179, 15.4935,\n 15.3705, 15.4651, 15.5592, 15.4384, 15.5324, 15.4133, 15.2955, 15.3898,\n 15.4835, 15.5767, 15.6694, 15.7617, 15.8534, 15.9447, 16.0355, 16.1258,\n 16.2157, 16.3051, 16.3941, 16.2816, 16.1702, 16.2594, 16.3481, 16.4364,\n 16.5243, 16.4152, 16.3071, 16.3951, 16.4827, 16.3762, 16.4636, 16.3583,\n 16.4456, 16.5325, 16.6190, 16.7052, 16.7909, 16.8763, 16.7733, 16.8585,\n 16.9434, 17.0279, 16.9265, 16.8259, 16.9105, 16.9947, 17.0785, 17.1620,\n 17.2451, 17.3279, 17.4103, 17.4924, 17.5741, 17.6556, 17.5579, 17.6392,\n 17.5426, 17.4466, 17.5280, 17.6090, 17.5142, 17.5951, 17.5011, 17.5818,\n 17.4887, 17.5693, 17.4770, 17.5575, 17.6377, 17.5464, 17.6264, 17.5359,\n 17.6158, 17.6954, 17.6058, 17.6852, 17.7643, 17.6756, 17.5875, 17.6667,\n 17.7455, 17.8241, 17.9023, 17.9803, 18.0580, 18.1355, 18.2126])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "matthews_corr_without_watermark": -0.09611547479941228, + "matthews_corr_with_watermark": 0.11224602755526586 + } + }, + "validation": { + "results": [ + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe sailors rode the breeze clear of the rocks.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.3856, 1.5511, 1.4863, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.6591, 1.8074, 1.7488, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.4863, 1.6246, 1.5714, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.6127, 1.7454, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.7321, 1.8598, 1.8102, 1.9370, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.8453, 1.9686, 1.9206, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.9533, 2.0726, 2.0259, 2.1444, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe weights made the rope stretch over the pulley.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "38.1%", + "z-score": "4.24", + "p value": "1.13e-05", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.5671, 1.7963, 1.6977, 1.9215, 1.8240, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.3641, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.0507, 0.9909, 1.1508, 1.3093,\n 1.2492, 1.4059, 1.3460, 1.2865, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.5097, 1.4517, 1.6008, 1.7488, 1.8956, 1.8371,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.9237, 2.0642, 2.0078, 2.1470, 2.0907, 2.0349, 1.9795, 2.1170,\n 2.0617, 2.0068, 1.9524, 2.0881, 2.2230, 2.1685, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.2723, 2.2188, 2.1656, 2.1128, 2.2436, 2.3735,\n 2.3206, 2.4495, 2.5776, 2.7050, 2.6519, 2.7783, 2.7253, 2.6726,\n 2.6203, 2.7454, 2.6932, 2.6414, 2.5898, 2.7137, 2.8368, 2.7852,\n 2.9076, 3.0292, 3.1502, 3.0984, 3.2186, 3.3381, 3.4570, 3.4050,\n 3.5232, 3.6407, 3.7576, 3.7055, 3.8216, 3.9372, 4.0522, 4.0000,\n 4.1143, 4.2280, 4.3412, 4.2889, 4.2369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe mechanical doll wriggled itself loose.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "9", + "Fraction of T in Greenlist": "14.3%", + "z-score": "-1.96", + "p value": "0.975", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -1.8481, -1.9064, -1.9640])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 7.6800, 7.8583, 8.0333,\n 7.8445, 8.0178, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.1036,\n 9.9540, 9.8072, 9.9454, 9.8020, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.0938, 11.2169, 11.3389, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.6890, 11.5630, 11.6809, 11.5570, 11.6743, 11.5525, 11.6693, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.5723, 12.6815,\n 12.5646, 12.4491, 12.5583, 12.6667, 12.5531, 12.4409, 12.5491, 12.4384,\n 12.3289, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 12.8599,\n 12.9641, 12.8586, 12.7542, 12.8582, 12.9616, 12.8586, 12.9616, 12.8598,\n 12.9624, 13.0643, 12.9639, 13.0655, 13.1665, 13.2669, 13.3667, 13.4660,\n 13.5647, 13.6630, 13.7606, 13.8578, 13.9544, 14.0505, 13.9531, 14.0489,\n 14.1442, 14.2390, 14.3333, 14.2374, 14.1422, 14.0479, 13.9543, 13.8615,\n 13.9561, 14.0503, 13.9585, 14.0524, 13.9615, 14.0550, 14.1481, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.6986, 14.7885,\n 14.8779, 14.9669, 15.0555, 14.9677, 15.0560, 15.1440, 15.0570, 14.9707,\n 15.0585, 14.9729, 14.8878, 14.9755, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.4085, 15.3247, 15.4103, 15.3272, 15.2446, 15.3301, 15.4152, 15.3333,\n 15.4182, 15.3370, 15.4217, 15.5060, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nIf you had eaten more, you would want less.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "137", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "25.5%", + "z-score": "0.148", + "p value": "0.441", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -0.9631, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.5832, -0.4062, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "154", + "Fraction of T in Greenlist": "77.4%", + "z-score": "17.1", + "p value": "1.31e-65", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 8.1882, 8.3557, 8.5206, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 8.9567, 9.1084, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.0701,\n 10.2036, 10.0673, 10.2000, 10.3314, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.5838, 10.4565, 10.3310, 10.4579, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.4311, 11.5476, 11.6632, 11.5470, 11.6620, 11.5476, 11.6620, 11.5492,\n 11.6631, 11.7762, 11.6652, 11.7778, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.4365, 12.5434, 12.6496, 12.7551, 12.8599,\n 12.9641, 12.8586, 12.9624, 13.0656, 13.1681, 13.0644, 13.1665, 13.2681,\n 13.1657, 13.2669, 13.3675, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.9594, 14.0561, 14.1524, 14.2481, 14.3434, 14.2443, 14.3393, 14.4338,\n 14.5277, 14.6212, 14.7143, 14.6170, 14.7098, 14.6135, 14.7060, 14.6107,\n 14.7029, 14.7947, 14.8860, 14.9769, 14.8831, 14.9737, 15.0639, 15.1537,\n 15.2430, 15.3320, 15.2397, 15.3284, 15.2369, 15.3254, 15.4135, 15.3230,\n 15.4108, 15.3211, 15.4087, 15.4959, 15.4072, 15.4942, 15.5808, 15.6670,\n 15.7529, 15.8384, 15.9235, 16.0083, 16.0928, 16.1769, 16.2607, 16.3441,\n 16.2574, 16.3407, 16.4236, 16.3377, 16.4205, 16.5028, 16.4178, 16.5000,\n 16.5819, 16.6634, 16.7447, 16.8256, 16.9063, 16.9866, 17.0667])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nAs you eat the most, you want the least.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "69", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "89.9%", + "z-score": "12.4", + "p value": "7.79e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.4432, 9.5969, 9.7483, 9.8976, 10.0448, 10.1900, 10.3333,\n 10.4748, 10.6145, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 11.1098,\n 11.2414, 11.3715, 11.5002, 11.3294, 11.4579, 11.5851, 11.7110, 11.8357,\n 11.9591, 12.0814, 12.2025, 12.3225, 12.4414])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe more you would want, the less you would eat.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.2435, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.6423, 8.4936, 8.3480, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.6612, 9.5258,\n 9.6630, 9.7989, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.0698,\n 9.9433, 9.8187, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 10.8379, 10.9585, 11.0780,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.3189, 11.2069, 11.0963,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.0077, 12.1164, 12.2244, 12.1200, 12.2275,\n 12.3343, 12.4405, 12.5460, 12.4434, 12.3419, 12.2414, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.0732, 13.1730, 13.2722, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.4745, 13.3810, 13.2882, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.9615, 14.0550, 14.1481, 14.0582,\n 14.1510, 14.2433, 14.1543, 14.2464, 14.3380, 14.4292, 14.5199, 14.4321,\n 14.3449, 14.2584, 14.1725, 14.2633, 14.3537, 14.4437, 14.5333, 14.6225,\n 14.7113, 14.7998, 14.8878, 14.9755, 15.0629, 14.9786, 15.0657, 15.1524,\n 15.0689, 15.1553, 15.2414, 15.3272, 15.4126, 15.3301, 15.2481, 15.1667,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI demand that the more John eat, the more he pays.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.0310, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -1.9180, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.7679, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.2258, -2.2618, -2.1306, -2.1667,\n -2.2026, -2.0726, -2.1086, -2.1444, -2.1801, -2.2156, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "150", + "Fraction of T in Greenlist": "75.4%", + "z-score": "16.4", + "p value": "7.87e-61", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 6.8620, 6.5997,\n 6.8127, 7.0201, 7.2222, 7.4194, 7.6120, 7.8003, 7.9845, 8.1650,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 8.9677, 8.7636,\n 8.9265, 9.0869, 9.2447, 9.4002, 9.2091, 9.3638, 9.5163, 9.6667,\n 9.8150, 9.9613, 10.1057, 10.2482, 10.3890, 10.5280, 10.3540, 10.1840,\n 10.3237, 10.4618, 10.5982, 10.4350, 10.5709, 10.7052, 10.8382, 10.9697,\n 11.0998, 11.2286, 11.3561, 11.4823, 11.6073, 11.7311, 11.5797, 11.4310,\n 11.2848, 11.4097, 11.5333, 11.6559, 11.7773, 11.8977, 12.0170, 12.1353,\n 11.9961, 11.8589, 11.9774, 12.0949, 12.2114, 12.3269, 12.4416, 12.5553,\n 12.6682, 12.7802, 12.8913, 12.7609, 12.6322, 12.5053, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 13.8497, 13.7347, 13.6211, 13.7230, 13.8244, 13.9251, 14.0253, 14.1248,\n 14.2238, 14.3222, 14.2118, 14.1025, 14.2009, 14.2988, 14.3961, 14.4928,\n 14.5890, 14.6847, 14.7799, 14.8746, 14.9687, 14.8629, 14.9568, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.3158, 15.4072, 15.4980, 15.5885,\n 15.4867, 15.3858, 15.4762, 15.3764, 15.2774, 15.3678, 15.4578, 15.5473,\n 15.6365, 15.5391, 15.4425, 15.3467, 15.4360, 15.5249, 15.6133, 15.7014,\n 15.7890, 15.8763, 15.7823, 15.8694, 15.9561, 16.0424, 15.9496, 15.8575,\n 15.9437, 15.8525, 15.7619, 15.8481, 15.9339, 16.0194, 16.1045, 16.0151,\n 15.9264, 15.8384, 15.9235, 16.0083, 16.0928, 16.1769, 16.2607, 16.3441,\n 16.2574, 16.3407, 16.4236, 16.5062, 16.4205, 16.3353, 16.4178, 16.3333,\n 16.2494, 16.3318, 16.4139, 16.4957, 16.5772, 16.4943, 16.4118])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary listens to the Grateful Dead, she gets depressed.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.69", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -3.0417, -2.8868,\n -2.7329, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.6640, -2.7014, -2.5560,\n -2.5936, -2.6309, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.5325, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "7", + "Fraction of T in Greenlist": "87.5%", + "z-score": "4.08", + "p value": "2.23e-05", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe angrier Mary got, the more she looked at pictures.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 1.7233, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.5671, 1.4697, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.5785, 1.7889, 1.7002, 1.6131, 1.8185, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.9604, 1.8766, 1.7942, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.4444, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.2722, -0.1357, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.2158, 0.3443, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.5803, 0.5375, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe higher the stakes, the lower his expectations are.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.5601, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.2817, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.2435, -1.2817, -1.3197, -1.3574, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "12", + "# Tokens in Greenlist": "9", + "Fraction of T in Greenlist": "75.0%", + "z-score": "4", + "p value": "3.17e-05", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 3.6556, 4.0000])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe more Fred is obnoxious, the less attention you should pay to him.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.4142, -1.4791, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.3245, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.5010, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.4100, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.6231, -1.6641, -1.5206, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.5406, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 9.8020, 9.9392, 10.0750, 9.9352, 10.0701,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.1983, 10.0673, 10.1982, 10.3280,\n 10.1999, 10.3287, 10.4565, 10.5830, 10.7084, 10.8328, 10.7084, 10.8321,\n 10.7098, 10.8327, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.1966, 11.3143, 11.1990, 11.0851, 11.2025, 11.0902, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.4471, 11.5601, 11.4531, 11.5655,\n 11.6772, 11.5718, 11.6829, 11.7932, 11.6894, 11.7992, 11.9083, 12.0167,\n 12.1244, 12.2314, 12.1295, 12.2360, 12.3419, 12.2414, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.1568, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.4834, 12.5853, 12.6867, 12.7875, 12.6939, 12.7943, 12.7017,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.0146, 12.9249,\n 13.0226, 12.9337, 13.0311, 13.1279, 13.2243, 13.1364, 13.2324, 13.3279,\n 13.2410, 13.3361, 13.4308, 13.5250, 13.4390, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.9111, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.1906, 14.2805, 14.1974, 14.2870, 14.2046, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.4850, 14.4046, 14.3248, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.5948, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn was lots more obnoxious than Fred.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "13", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-0.8", + "p value": "0.788", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 5.9186, 6.1107, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.6470, 9.7738, 9.8995,\n 9.7897, 9.9146, 9.8064, 9.9304, 10.0535, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 9.9187, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.6719, 10.5769, 10.4829, 10.5955, 10.7074, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.8359, 12.9337, 12.8456, 12.7581, 12.8556, 12.7690, 12.8661, 12.9628,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.2593, 13.1745, 13.2690, 13.1849,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.3002, 13.2182, 13.1367, 13.2299,\n 13.1491, 13.0688, 13.1617, 13.2542, 13.1746, 13.2668, 13.1878, 13.2796,\n 13.2012, 13.1233, 13.2149, 13.3060, 13.2288, 13.3196, 13.2429, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.4510, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe more people you give beer to, the more people get sick.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "21.4%", + "z-score": "-1.05", + "p value": "0.854", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -0.9759, -1.0211, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 6.7625, 6.5924, 6.4273, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.7150, 7.5907, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.0401, 6.9294, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.5514, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.0711,\n 7.2104, 7.1152, 7.2532, 7.3901, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 7.9460, 7.8558,\n 7.7667, 7.6785, 7.5912, 7.7192, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.1731, 8.2956, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 8.8778, 8.9940, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.4812, 9.5931, 9.5112, 9.6225,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.1695, 10.2753, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.9380, 11.8638, 11.9586, 11.8849, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe more does Bill smoke, the more Susan hates him.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "200", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.5%", + "z-score": "-0.163", + "p value": "0.565", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.2819, -2.3333, -2.3842, -2.1712, -2.2226, -2.0137, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.6690, -1.7154, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.3373, -1.3833, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.1094, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -0.8513,\n -0.7216, -0.7620, -0.8022, -0.6737, -0.7139, -0.7539, -0.6266, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.3299, -0.3702, -0.4103, -0.2865, -0.1633])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe more pictures of him that appear in the news, the more embarrassed John becomes.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "5.5%", + "z-score": "-3.85", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.6803, -2.7406, -2.7998, -2.8577,\n -2.9146, -2.9704, -2.6349, -2.6943, -2.7526, -2.8098, -2.8660, -2.9212,\n -2.9755, -3.0290, -3.0816, -3.1334, -3.1845, -3.2348, -3.2844, -3.3333,\n -3.3816, -3.4293, -3.4763, -3.5228, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -3.7685, -3.8103,\n -3.8517])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 7.8320, 8.0018, 8.1689, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.0064, 8.1684, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.8082, 8.6702, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.1333, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.7065, 9.5876, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.2706, 10.3923, 10.2833, 10.4042, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.7211, 10.8353, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.1172, 11.2268,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.0218, 12.1244,\n 12.0345, 12.1366, 12.2381, 12.1492, 12.2503, 12.3508, 12.2628, 12.3629,\n 12.4625, 12.3754, 12.4746, 12.5732, 12.4870, 12.5852, 12.4998, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.7461, 12.6643, 12.7597, 12.8546, 12.9491, 12.8680, 12.7876, 12.8817,\n 12.9755, 12.8957, 12.9891, 13.0821, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.0314, 13.1233, 13.2149, 13.1376, 13.2288, 13.3196, 13.2429, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.5265, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nEvery senator seems to become more corrupt, as he talks to more lobbyists.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "1", + "Fraction of T in Greenlist": "6.2%", + "z-score": "-1.73", + "p value": "0.958", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.5406, 9.6838, 9.8254, 9.9653, 10.1036,\n 9.9540, 10.0915, 9.9454, 9.8020, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 10.9777, 11.1026,\n 11.2263, 11.0938, 10.9634, 11.0870, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.1994, 11.0762, 11.1967, 11.3163, 11.4349, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.5476, 11.6632, 11.5470, 11.6620, 11.7762, 11.8896, 12.0021,\n 12.1139, 12.2248, 12.3350, 12.4444, 12.5531, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.7597, 12.8653, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.0677, 12.9624, 13.0656, 13.1681, 13.0644, 13.1665, 13.2681,\n 13.1657, 13.2669, 13.1657, 13.2665, 13.1665, 13.0674, 13.1680, 13.2680,\n 13.3674, 13.4664, 13.5647, 13.4674, 13.5654, 13.4691, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 13.8615,\n 13.9561, 13.8642, 13.7730, 13.8675, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.3330, 14.4248, 14.5161, 14.6070, 14.6976, 14.6084, 14.6986, 14.7885,\n 14.8779, 14.9669, 15.0555, 15.1438, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.4057, 15.4922, 15.5783, 15.4919, 15.5778, 15.6634, 15.7485, 15.8334,\n 15.9179, 16.0020, 15.9170, 16.0009, 16.0845, 16.1678, 16.0836, 16.1667,\n 16.0832, 16.1660, 16.2486, 16.3308, 16.4127, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWho does John visit Sally because he likes?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714, 0.3464, 0.6794, 1.0000,\n 0.8729, 0.7505, 0.6325, 0.5185, 0.4082, 0.7035, 0.9901, 0.8783, 0.7698,\n 0.6644, 0.5620, 0.4623, 0.3651, 0.2705, 0.5345, 0.4402, 0.3482, 0.2582,\n 0.5108, 0.4211, 0.3333, 0.2474, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714,\n 0.7006, 0.6172, 0.8412, 1.0613, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456,\n 1.1547, 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.5492, 1.7321,\n 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.7233, 1.6524, 1.5823,\n 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.4142, 1.3483, 1.2831, 1.4506,\n 1.3856, 1.5511, 1.7150, 1.8773, 1.8116, 1.7467, 1.6823, 1.8419, 2.0000,\n 1.9355, 2.0918, 2.2468, 2.1822, 2.1182, 2.0548, 1.9920, 1.9298, 1.8682,\n 2.0197, 1.9582, 1.8974, 1.8370, 1.7772, 1.7179, 1.6591, 1.6008, 1.5430,\n 1.4857, 1.4289, 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423,\n 1.1882, 1.1345, 1.2778, 1.2243, 1.3663, 1.3128, 1.2597, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366, 0.8868,\n 0.8374, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303, 0.6825, 0.8165,\n 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.7127, 0.6662, 0.6199, 0.5740,\n 0.5283, 0.4828, 0.6128, 0.5674, 0.5222, 0.4774, 0.4327, 0.3884, 0.5164,\n 0.4721, 0.4280, 0.3841, 0.3405, 0.2971, 0.4233, 0.3800, 0.3369, 0.2940,\n 0.2513, 0.2089, 0.3333, 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.2462,\n 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 6.7778, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.7337, 6.9282, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.4857, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.6423, 8.4936, 8.3480, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.4704, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.5470, 11.6620, 11.7762, 11.8896, 11.7757,\n 11.8885, 12.0005, 11.8885, 12.0000, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.1117, 12.0044, 11.8982, 11.7932, 11.9029, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.6592, 12.5604, 12.6635, 12.7660, 12.8679, 12.7704, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.4745, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 14.0488,\n 14.1428, 14.2364, 14.3295, 14.4222, 14.5144, 14.6062, 14.5144, 14.6059,\n 14.6970, 14.6062, 14.6970, 14.6070, 14.6976, 14.7877, 14.8773, 14.7885,\n 14.7002, 14.6126, 14.5257, 14.6155, 14.7049, 14.7939, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.0629, 15.1498, 15.2364, 15.1524,\n 15.2387, 15.1553, 15.2414, 15.3272, 15.4126, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.5870, 15.6709, 15.7545, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMarianne did not leave.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 9.7986, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.3496, 10.2554, 10.1621, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.5841, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.2674, 11.1810, 11.0952, 11.0102, 11.1151, 11.2194, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.5655, 11.4829, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.0355, 11.9558, 11.8766, 11.9741, 12.0712, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.3163, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 12.9087, 12.8333,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHe could not] have been working.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547, 1.5403, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 0.8083, 1.1323, 1.4444,\n 1.7457, 1.6082, 1.8974, 1.7628, 2.0412, 2.3116, 2.1783, 2.0494, 1.9245,\n 1.8034, 1.6859, 1.9415, 2.1909, 2.4345, 2.3163, 2.2011, 2.0889, 2.3238,\n 2.5538, 2.4422, 2.3333, 2.2269, 2.1229, 2.0211, 2.2418, 2.4585, 2.3570,\n 2.2576, 2.1602, 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094,\n 2.3094, 2.5064, 2.4163, 2.3276, 2.2404, 2.4327, 2.6222, 2.5352, 2.4495,\n 2.3651, 2.5504, 2.7333, 2.6491, 2.5660, 2.7456, 2.6632, 2.8402, 3.0151,\n 2.9329, 2.8518, 2.7717, 2.9433, 3.1129, 3.0330, 2.9542, 2.8764, 2.7995,\n 2.9656, 2.8893, 2.8138, 2.9775, 2.9025, 3.0641, 3.2242, 3.1493, 3.0754,\n 3.0022, 3.1597, 3.3156, 3.2426, 3.1704, 3.0989, 3.0282, 2.9582, 2.8889,\n 3.0415, 3.1928, 3.1236, 3.2733, 3.2044, 3.3526, 3.4995, 3.4308, 3.3627,\n 3.2953, 3.2285, 3.1623, 3.0967, 3.0317, 2.9673, 2.9035, 3.0467, 3.1889,\n 3.1251, 3.0619, 2.9991, 3.1395, 3.2788, 3.2161, 3.1539, 3.0923, 3.0311,\n 2.9704, 2.9103, 2.8505, 2.9872, 3.1229, 3.0632, 3.0039, 2.9451, 3.0792,\n 3.2124, 3.1536, 3.0952, 3.0373, 2.9798, 2.9227, 3.0540, 3.1844, 3.1273,\n 3.0706, 3.0143, 3.1433, 3.2715, 3.2152, 3.1593, 3.1038, 3.0486, 2.9938,\n 3.1203, 3.2460, 3.3710, 3.3160, 3.2614, 3.2071, 3.3309, 3.4539, 3.3996,\n 3.3457, 3.2921, 3.2389, 3.1860, 3.3075, 3.4283, 3.3754, 3.4954, 3.4427,\n 3.5619, 3.6805, 3.6277, 3.5753, 3.5232, 3.6407, 3.7576, 3.7055, 3.6537,\n 3.7697, 3.7180, 3.8333, 3.9481, 3.8964, 4.0105, 3.9590, 4.0723, 4.1851,\n 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 8.8252, 8.6549, 8.8121, 8.6469, 8.8029, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.6743, 9.8150,\n 9.9540, 9.8072, 9.6632, 9.8020, 9.9392, 10.0750, 9.9352, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.4614, 10.5903, 10.4581, 10.5862,\n 10.4565, 10.5838, 10.7099, 10.5830, 10.7084, 10.8328, 10.7084, 10.5859,\n 10.4650, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 10.9621, 11.0810, 10.9669, 11.0851, 11.2025, 11.3189, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.4420, 11.5556, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.8944, 12.0044, 11.8982, 11.7932, 11.9029, 12.0118, 11.9083, 11.8058,\n 11.9144, 12.0223, 11.9213, 12.0286, 12.1353, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.5657, 12.6684, 12.7704, 12.6739,\n 12.5782, 12.4834, 12.5853, 12.6867, 12.7875, 12.8877, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.3960, 13.4920, 13.4021, 13.4977, 13.4086, 13.5039, 13.5987, 13.6931,\n 13.7870, 13.6990, 13.7926, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 13.9896, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.3642, 14.2805, 14.1974, 14.1149, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.3897, 14.3087, 14.3970, 14.4850, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHe can not have been working.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.7877, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.4389, 1.6330,\n 1.8245, 1.7450, 1.9333, 1.8543, 1.7765, 1.6997, 1.8838, 1.8074,\n 1.9887, 1.9127, 2.0913, 2.0158, 1.9413, 1.8677, 1.7951, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 2.0517, 1.9829, 2.1470, 2.0785, 2.0107, 2.1723, 2.3324, 2.4910,\n 2.4228, 2.5796, 2.5117, 2.4444, 2.3779, 2.5322, 2.4660, 2.6186,\n 2.5527, 2.7037, 2.6381, 2.5731, 2.5087, 2.4449, 2.5934, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.3660, 2.5103, 2.6536,\n 2.5925, 2.7344, 2.8753, 2.8141, 2.9537, 3.0923, 3.2299, 3.1685,\n 3.3049, 3.2437, 3.1831, 3.1229, 3.2577, 3.1977, 3.3314, 3.2717,\n 3.4042, 3.3447, 3.2857, 3.2271, 3.1690, 3.2998, 3.2419, 3.1844,\n 3.1273, 3.0706, 3.0143, 2.9584, 3.0872, 3.2152, 3.3424, 3.2863,\n 3.4126, 3.3567, 3.4821, 3.6067, 3.7306, 3.8538, 3.7975, 3.9198,\n 3.8638, 3.8081, 3.7528, 3.8740, 3.8189, 3.9392, 3.8843, 4.0038,\n 3.9491, 3.8947, 3.8406, 3.7869, 3.9052, 3.8516, 3.7984, 3.7455,\n 3.6929, 3.6407, 3.5887, 3.7055, 3.8216, 3.9372, 3.8851, 4.0000,\n 3.9481, 3.8964, 4.0105, 4.1239, 4.2369, 4.1851, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.4550, 6.2993, 6.1477, 6.0000,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.3217, 8.2178, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.9586, 8.8602, 8.7629, 8.6667, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.8529, 8.9763, 8.8833, 9.0057, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.6210, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.2194, 11.1352, 11.0517,\n 10.9689, 11.0728, 10.9906, 10.9091, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.2389, 11.1588, 11.2602, 11.1807, 11.1018, 11.2028, 11.3032, 11.2250,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.7604, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.9176,\n 11.8429, 11.7687, 11.6949, 11.6217, 11.5489, 11.4766, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nYou will believe Bob.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.9262, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.8173, 1.9829, 1.9149, 1.8475, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.9066, 2.0651, 2.0000, 1.9355, 2.0918, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.9920, 2.1442, 2.0817, 2.0197, 2.1700, 2.1082,\n 2.0470, 1.9863, 1.9261, 2.0739, 2.2205, 2.1602, 2.1005, 2.2454,\n 2.1858, 2.1268, 2.0682, 2.2111, 2.1527, 2.0948, 2.0373, 2.1783,\n 2.1210, 2.0642, 2.0078, 1.9518, 2.0907, 2.0349, 2.1726, 2.1170,\n 2.0617, 2.1980, 2.3333, 2.2780, 2.2230, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.2723, 2.2188, 2.3500, 2.2966, 2.2436, 2.3735,\n 2.5026, 2.4495, 2.3967, 2.5247, 2.4721, 2.4198, 2.3679, 2.3163,\n 2.4426, 2.3912, 2.5166, 2.4653, 2.4142, 2.5386, 2.6623, 2.6112,\n 2.5604, 2.6830, 2.6323, 2.5820, 2.5319, 2.4822, 2.6034, 2.5538,\n 2.6742, 2.6247, 2.5754, 2.6949, 2.8137, 2.7644, 2.7154, 2.8333,\n 2.7844, 2.7358, 2.6874, 2.6393, 2.7560, 2.7080, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn has not kissed Mary.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.5952, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.7461, 0.8682, 0.8248, 0.7816, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI said that never in my life had I seen a place like Bangor.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.2492, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.8040, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 1.1239,\n 1.0735, 1.0235, 1.1593, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.2136, 1.3443, 1.4743, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.0106, 0.9659, 1.0890, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.1990, 1.1547, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "12", + "# Tokens in Greenlist": "7", + "Fraction of T in Greenlist": "58.3%", + "z-score": "2.67", + "p value": "0.00383", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.6667])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMickey looked up it.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 1.3472,\n 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547, 0.9802, 1.3608,\n 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856, 1.7321, 1.5852, 1.4444,\n 1.7457, 2.0370, 1.8974, 2.1776, 2.4495, 2.3116, 2.1783, 2.0494, 1.9245,\n 1.8034, 2.0605, 2.3113, 2.1909, 2.4345, 2.6726, 2.5533, 2.4371, 2.3238,\n 2.2133, 2.1054, 2.3333, 2.5568, 2.4495, 2.6679, 2.8823, 2.7757, 2.6713,\n 2.5690, 2.4689, 2.3706, 2.5775, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823,\n 2.8868, 2.7928, 2.7005, 2.6098, 2.8006, 2.9887, 2.8983, 3.0833, 3.2660,\n 3.1760, 3.0873, 3.0000, 2.9140, 2.8292, 3.0071, 3.1829, 3.0984, 3.2717,\n 3.4429, 3.3587, 3.2757, 3.1937, 3.1129, 3.0330, 3.2004, 3.3659, 3.2863,\n 3.4498, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998, 3.2242, 3.3826, 3.5396,\n 3.4641, 3.6193, 3.7730, 3.6977, 3.6233, 3.5496, 3.4768, 3.4047, 3.5556,\n 3.7051, 3.6332, 3.7812, 3.9279, 3.8562, 3.7852, 3.7148, 3.6452, 3.5762,\n 3.7205, 3.8636, 3.7947, 3.9365, 4.0771, 4.0085, 3.9404, 3.8730, 3.8061,\n 3.7399, 3.8784, 4.0158, 3.9497, 4.0859, 4.2212, 4.1552, 4.0898, 4.0249,\n 3.9606, 3.8968, 4.0301, 4.1625, 4.0988, 4.2301, 4.3605, 4.2970, 4.2339,\n 4.1713, 4.1092, 4.0476, 4.1763, 4.3042, 4.2426, 4.3695, 4.4956, 4.4342,\n 4.3733, 4.3128, 4.2527, 4.1931, 4.3176, 4.4413, 4.3818, 4.5047, 4.6268,\n 4.5674, 4.5083, 4.4497, 4.3915, 4.3336, 4.4544, 4.5744, 4.5166, 4.6359,\n 4.7544, 4.6968, 4.6395, 4.5826, 4.5260, 4.4698, 4.5871, 4.7037, 4.6476,\n 4.7635, 4.8787, 4.8227, 4.7670, 4.7117, 4.6567, 4.6020, 4.7161, 4.8295,\n 4.7749, 4.8877, 5.0000, 4.9455, 4.8913, 4.8374, 4.7838, 4.7305, 4.8416,\n 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThere tended to be a lot of discussion.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.6430, 0.5952, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.7461, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.3721, 9.5021, 9.6309, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.3411, 10.4592, 10.5763, 10.4745,\n 10.5909, 10.7066, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.0883, 11.1991, 11.3091, 11.2127, 11.1172, 11.2268,\n 11.3357, 11.2414, 11.3497, 11.4574, 11.5645, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 12.0218, 12.1244,\n 12.0345, 12.1366, 12.2381, 12.1492, 12.0611, 12.1622, 12.2628, 12.1756,\n 12.2758, 12.3754, 12.4746, 12.3883, 12.4870, 12.5852, 12.4998, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 12.9116, 13.0067,\n 13.1014, 13.0185, 12.9363, 13.0307, 13.1246, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.2419, 13.3343, 13.4263, 13.3463, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.7327, 13.8222, 13.9113, 13.8333,\n 13.7559, 13.8447, 13.9332, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn tried to be a good boy.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -1.7086, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -3.0448, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.3131, -3.3526, -3.3918, -3.4308, -3.4694, -3.5079, -3.5460, -3.5839,\n -3.6216, -3.6590, -3.6961, -3.7330, -3.7697, -3.8061, -3.8424, -3.8784,\n -3.9141, -3.9497, -3.9850, -4.0202, -4.0551, -4.0898, -4.1243, -4.1586,\n -3.9954, -4.0301, -3.8686, -3.9036, -3.9384, -3.9729, -3.8142, -3.8490,\n -3.8837, -3.7270, -3.5714, -3.6068, -3.6420, -3.6770, -3.5238, -3.5590,\n -3.4073, -3.4428, -3.4780, -3.5131, -3.3637, -3.3989, -3.4340, -3.2863,\n -3.1396, -3.1753, -3.2107, -3.2460, -3.1013, -3.1368, -2.9933, -3.0290,\n -3.0644, -3.0997, -2.9581, -2.9935, -3.0288, -2.8887, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.7187, -2.7541, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.5893, -2.6247, -2.6599, -2.5265, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.3688, -2.4042, -2.2740, -2.3094, -2.3447, -2.3798, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.0811, 4.8990,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 4.8107, 5.0186, 4.8742, 5.0779, 5.2778, 5.1371, 5.0000,\n 4.8662, 4.7357, 4.9316, 4.8038, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.1471, 6.0474, 5.9491, 5.8522, 5.7566, 5.6622,\n 5.8139, 5.9641, 6.1128, 6.0193, 6.1664, 6.3122, 6.2197, 6.1283,\n 6.0380, 5.9488, 5.8606, 5.7735, 5.9172, 6.0596, 5.9732, 6.1143,\n 6.2541, 6.3928, 6.5303, 6.6667, 6.5807, 6.7159, 6.8500, 6.9830,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.5472, 7.6742, 7.8003,\n 7.9254, 8.0497, 7.9649, 8.0882, 8.2107, 8.3324, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.5607, 8.6783, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.4812, 9.3993, 9.3181, 9.2376,\n 9.1577, 9.0786, 9.0000, 9.1119, 9.2232, 9.3338, 9.2559, 9.1785,\n 9.1018, 9.0257, 8.9502, 9.0601, 9.1694, 9.0944, 9.2032, 9.1287,\n 9.2368, 9.1629, 9.0895, 9.1970, 9.3040, 9.4103, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.9340, 10.0371, 9.9642, 9.8918, 9.8198, 9.7483,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.9132, 10.0143, 10.1149, 10.2151,\n 10.3148, 10.4140, 10.5128, 10.4427, 10.5410, 10.6389, 10.7363, 10.8333,\n 10.9299, 10.8602, 10.7910, 10.7222, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn is eager.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.6098, 3.4816, 3.7009, 3.9158, 4.1265, 4.0000,\n 3.8765, 3.7559, 3.9614, 3.8431, 3.7273, 3.6141, 3.8146, 3.7033,\n 3.9001, 4.0937, 4.2844, 4.1740, 4.0657, 3.9595, 4.1461, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.4809, 4.3788, 4.2784, 4.4537, 4.3546,\n 4.5274, 4.6981, 4.8667, 4.7683, 4.6715, 4.5760, 4.7419, 4.6476,\n 4.8113, 4.9731, 5.1332, 5.0395, 4.9472, 4.8561, 5.0138, 4.9237,\n 5.0795, 5.2338, 5.3865, 5.2970, 5.2086, 5.1212, 5.2719, 5.1855,\n 5.3345, 5.4822, 5.6285, 5.5426, 5.4576, 5.3736, 5.5181, 5.4349,\n 5.5780, 5.7199, 5.8605, 5.7778, 5.6959, 5.6149, 5.7540, 5.6737,\n 5.5942, 5.5155, 5.6530, 5.5750, 5.4977, 5.4212, 5.5572, 5.4813,\n 5.6160, 5.7498, 5.8825, 5.8069, 5.7320, 5.6578, 5.7892, 5.7155,\n 5.8458, 5.9752, 6.1036, 6.0302, 5.9575, 5.8853, 6.0125, 5.9409,\n 6.0671, 6.1926, 6.3172, 6.2458, 6.1750, 6.1047, 6.2282, 6.1584,\n 6.2810, 6.4028, 6.5238, 6.4543, 6.3853, 6.3168, 6.4368, 6.3688,\n 6.4880, 6.6064, 6.7242, 6.6564, 6.5891, 6.5223, 6.6391, 6.5727,\n 6.6887, 6.8041, 6.9189, 6.8527, 6.7869, 6.7217, 6.8355, 6.7706,\n 6.8838, 6.9964, 7.1083, 7.0436, 6.9793, 6.9155, 7.0266, 6.9631,\n 7.0736, 7.1835, 7.2929, 7.2296, 7.1667, 7.1041, 7.2127, 7.1506,\n 7.2585, 7.3660, 7.4729, 7.4109, 7.3493, 7.2881, 7.3943, 7.3333,\n 7.4390, 7.5441, 7.6488, 7.5880, 7.5276, 7.4676, 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.1101, 5.8966, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.3283, 8.1654, 8.0064, 8.1684, 8.3281, 8.4856, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.0947, 9.2387, 9.3811, 9.2418, 9.3831, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.4565, 10.3287, 10.2030, 10.0791, 10.2075, 10.3347, 10.4608, 10.3397,\n 10.2202, 10.3459, 10.4704, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.9727, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.2187, 11.3333, 11.4471, 11.5601, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.6829, 11.7932, 11.9029, 12.0118, 11.9083, 12.0167,\n 12.1244, 12.2314, 12.1295, 12.2360, 12.3419, 12.4471, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.9662, 12.8679, 12.7704, 12.6739,\n 12.7755, 12.8766, 12.9771, 12.8819, 12.9820, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.2834, 13.1905, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.8642, 13.7730, 13.8675, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.3330, 14.2433, 14.1543, 14.2464, 14.3380, 14.2499, 14.3412, 14.2539,\n 14.3449, 14.4355, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 14.9755, 14.8912, 14.9786, 15.0657, 15.1524,\n 15.2387, 15.3247, 15.4103, 15.4956, 15.5805, 15.6651, 15.7494, 15.6667,\n 15.5845, 15.5028, 15.4217, 15.5060, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe want John to win.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.1968, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.4271, 5.2085, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 8.9455, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.5219, 9.3831, 9.2469, 9.1130, 8.9815,\n 8.8522, 8.7250, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.9754, 9.8590, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.5027, 10.6232, 10.7429, 10.8616, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.6667, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.5909, 10.7066, 10.6061, 10.5067, 10.6218, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 10.8544, 10.9637, 11.0724, 11.1803, 11.0897,\n 10.9998, 11.1073, 11.0183, 11.1253, 11.2316, 11.1435, 11.0562, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.6234, 12.7199, 12.8160, 12.9116, 12.8285,\n 12.7461, 12.6643, 12.5831, 12.5024, 12.4223, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.5495, 12.4713, 12.5657, 12.6597, 12.5820, 12.6757, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.8007, 12.7248, 12.8169, 12.9087, 12.8333,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe box contained the ball from the tree.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "8", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "62.5%", + "z-score": "2.45", + "p value": "0.00715", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe tube was escaped by gas.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.5601, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.8520, 0.7868, 0.9631, 0.8980, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 1.0915, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 0.8847, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.3146, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWater bubbled up out of the kettle.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.358", + "p value": "0.64", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.2887,\n -0.3581])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.8430, 9.9542, 10.0647, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.0342, 9.9524, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.6683, 10.5893,\n 10.6929, 10.7959, 10.7175, 10.6397, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.1933, 11.2924, 11.3910, 11.3150,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.7169, 11.8117, 11.9060, 12.0000,\n 11.9273, 11.8551, 11.9487, 12.0419, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe tub leaked water.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "36.9%", + "z-score": "3.47", + "p value": "0.000261", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547, 0.9802, 1.3608,\n 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142, 1.7321, 1.5852, 1.4444,\n 1.7457, 2.0370, 1.8974, 2.1776, 2.0412, 1.9096, 1.7823, 2.0494, 1.9245,\n 1.8034, 2.0605, 2.3113, 2.1909, 2.4345, 2.3163, 2.2011, 2.0889, 2.3238,\n 2.2133, 2.1054, 2.3333, 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570,\n 2.5690, 2.4689, 2.3706, 2.5775, 2.7811, 2.6833, 2.8830, 2.7863, 2.6914,\n 2.5981, 2.7928, 2.7005, 2.6098, 2.8006, 2.9887, 2.8983, 3.0833, 2.9938,\n 2.9057, 2.8189, 3.0000, 2.9140, 2.8292, 3.0071, 3.1829, 3.0984, 3.2717,\n 3.1879, 3.1052, 3.0237, 3.1937, 3.1129, 3.0330, 3.2004, 3.3659, 3.2863,\n 3.4498, 3.3708, 3.2928, 3.2157, 3.3764, 3.2998, 3.2242, 3.3826, 3.5396,\n 3.4641, 3.6193, 3.7730, 3.6977, 3.6233, 3.5496, 3.4768, 3.4047, 3.3333,\n 3.2627, 3.4130, 3.3428, 3.4915, 3.4217, 3.3526, 3.4995, 3.4308, 3.3627,\n 3.5079, 3.6519, 3.5839, 3.5166, 3.4499, 3.3838, 3.3182, 3.4599, 3.3947,\n 3.3301, 3.2660, 3.2025, 3.3420, 3.4806, 3.4171, 3.5544, 3.4913, 3.4286,\n 3.3665, 3.3049, 3.4403, 3.5748, 3.5132, 3.4521, 3.3915, 3.3314, 3.4641,\n 3.4042, 3.3447, 3.2857, 3.2271, 3.1690, 3.1113, 3.2419, 3.3717, 3.3140,\n 3.4428, 3.3853, 3.3282, 3.2715, 3.2152, 3.3424, 3.4689])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhat the water did to the bottle was fill it.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "81", + "Fraction of T in Greenlist": "40.7%", + "z-score": "5.12", + "p value": "1.56e-07", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 3.5382,\n 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.3027, 4.1111,\n 4.3644, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 4.9507, 4.7819, 5.0037,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.5234, 5.3708, 5.5705, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460, 7.8905,\n 7.7784, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240, 8.2619, 8.1550,\n 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853, 8.6173, 8.5149, 8.4138,\n 8.5448, 8.4449, 8.5749, 8.4763, 8.3789, 8.2825, 8.1873, 8.0931, 8.0000,\n 7.9079, 8.0370, 7.9460, 8.0741, 7.9839, 7.8948, 7.8065, 7.7192, 7.6328,\n 7.5472, 7.6742, 7.5895, 7.7155, 7.8406, 7.7566, 7.6734, 7.5910, 7.5094,\n 7.6335, 7.5526, 7.4724, 7.3930, 7.3143, 7.4373, 7.3592, 7.2818, 7.2051,\n 7.3271, 7.2510, 7.1755, 7.1007, 7.2217, 7.1474, 7.0737, 7.0007, 6.9282,\n 6.8563, 6.7850, 6.7143, 6.6441, 6.5745, 6.5054, 6.4368, 6.3688, 6.3013,\n 6.2342, 6.1677, 6.1017, 6.0362, 5.9711, 5.9065, 5.8424, 5.7787, 5.7155,\n 5.6527, 5.5904, 5.5284, 5.4670, 5.4059, 5.3452, 5.2850, 5.2251, 5.1657,\n 5.1066, 5.0479, 5.1647, 5.1063, 5.0483, 5.1642, 5.1064, 5.0489, 4.9918,\n 4.9351, 4.8787, 4.8227, 4.9373, 4.8815, 4.8260, 4.7709, 4.8845, 4.9975,\n 5.1100, 5.0548, 5.0000, 5.1117, 5.0571, 5.0027, 4.9487, 4.8950, 5.0057,\n 5.1159])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.7967, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 9.0990, 8.9618, 9.1051, 8.9709, 9.1130, 8.9815,\n 9.1225, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.6867, 9.8187, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.4704, 10.5940, 10.4770, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.9669, 11.0851, 11.2025, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.4420, 11.5556, 11.4471, 11.5601, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.8982, 12.0077, 12.1164, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.2360, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.6635, 12.7660, 12.8679, 12.9692, 13.0699,\n 12.9728, 13.0732, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.2791,\n 13.3770, 13.4745, 13.3810, 13.4780, 13.5746, 13.6707, 13.7663, 13.8615,\n 13.7694, 13.8642, 13.9585, 14.0524, 14.1458, 14.0550, 14.1481, 14.2408,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.6986, 14.6103,\n 14.7002, 14.6126, 14.5257, 14.6155, 14.7049, 14.7939, 14.8825, 14.9707,\n 15.0585, 15.1460, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.3226,\n 15.4085, 15.3247, 15.4103, 15.4956, 15.5805, 15.6651, 15.5823, 15.6667,\n 15.7507, 15.6686, 15.7524, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhat the water did to the whole bottle was fill it.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -0.8660,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.0998, 0.0497, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.3849,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.0420, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.5448, 8.4449, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.1380, 9.0419, 8.9469,\n 8.8529, 8.9763, 9.0987, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.0504, 9.9625, 9.8753, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.4769, 10.3923,\n 10.3085, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.9689, 10.8867, 10.8051, 10.7242, 10.6439, 10.7480, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.2602, 11.3610, 11.2816, 11.2028, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.5109, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.0327, 11.9586, 11.8849, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe tank leaked the fluid free.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774, 0.4201, 0.2722,\n 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714, 0.3464, 0.2265, 0.1111,\n 0.0000, 0.3216, 0.6325, 0.5185, 0.4082, 0.7035, 0.5941, 0.4880, 0.3849,\n 0.6644, 0.9366, 0.8321, 0.7303, 0.6312, 0.5345, 0.4402, 0.3482, 0.6025,\n 0.8513, 0.7579, 0.6667, 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857,\n 0.7006, 0.6172, 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.7332, 0.6547,\n 0.5774, 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.6537, 0.5846, 0.5164, 0.4491,\n 0.3825, 0.3169, 0.2520, 0.4384, 0.6226, 0.5571, 0.4924, 0.6732, 0.6086,\n 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071, 0.6448, 0.5832, 0.5222,\n 0.4619, 0.6319, 0.8003, 0.7395, 0.6794, 0.6198, 0.5608, 0.5023, 0.4444,\n 0.6083, 0.7707, 0.7124, 0.6547, 0.5974, 0.5407, 0.4845, 0.4288, 0.5871,\n 0.7441, 0.6880, 0.6325, 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.7201,\n 0.6660, 0.6124, 0.7625, 0.7089, 0.6558, 0.6030, 0.7509, 0.8978, 0.8447,\n 0.7921, 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.7095, 0.8485, 0.7987, 0.7493, 0.7001,\n 0.6513, 0.6029, 0.5547, 0.6912, 0.8268, 0.7784, 0.7303, 0.6825, 0.6351,\n 0.5879, 0.5410, 0.6742, 0.8066, 0.7595, 0.7127, 0.8438, 0.7971, 0.7506,\n 0.7044, 0.8340, 0.9629, 0.9165, 0.8704, 0.8245, 0.7789, 0.7336, 0.6885,\n 0.8154, 0.9415, 0.8963, 0.8513, 0.8065, 0.7620, 0.7177, 0.6737, 0.7979,\n 0.9215, 0.8773, 0.8333, 0.9558, 1.0777, 1.1990, 1.1547, 1.1106, 1.0668,\n 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn lay the ball in the box.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.6803, -1.7376, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.7154, -1.7614, -1.8071, -1.6407, -1.6865,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.7857, -1.8257,\n -1.8656, -1.7237, -1.7636, -1.6230, -1.4832, -1.5236, -1.5637, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "155", + "Fraction of T in Greenlist": "77.9%", + "z-score": "17.2", + "p value": "7.86e-67", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.1358, 7.3271, 7.1241, 7.3131, 7.4983, 7.6800, 7.4885, 7.6681,\n 7.4839, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.0000,\n 8.1654, 8.0017, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.3881, 9.5321, 9.6743, 9.5263,\n 9.6676, 9.8072, 9.9454, 9.8020, 9.9392, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.2000, 10.3314, 10.4614, 10.5903, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.9634, 11.0870, 11.2094, 11.0818, 11.2036, 11.3244,\n 11.4442, 11.3196, 11.4388, 11.3163, 11.4349, 11.5525, 11.6693, 11.5494,\n 11.6656, 11.5476, 11.4311, 11.5470, 11.6620, 11.7762, 11.8896, 12.0021,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.2816,\n 13.3840, 13.4859, 13.5871, 13.4804, 13.5813, 13.4758, 13.5764, 13.6763,\n 13.7757, 13.8745, 13.7710, 13.8695, 13.9675, 14.0649, 13.9630, 14.0601,\n 14.1567, 14.2527, 14.3483, 14.4433, 14.5379, 14.4381, 14.5324, 14.6262,\n 14.7195, 14.6212, 14.7143, 14.6170, 14.7098, 14.8021, 14.8940, 14.7981,\n 14.8896, 14.7947, 14.7005, 14.7920, 14.8831, 14.9737, 15.0639, 15.1537,\n 15.0610, 15.1505, 15.2397, 15.3284, 15.4167, 15.5046, 15.5922, 15.6793,\n 15.7661, 15.8525, 15.9385, 16.0242, 16.1095, 16.1945, 16.2791, 16.1892,\n 16.2736, 16.3577, 16.4414, 16.3526, 16.4361, 16.5193, 16.4314, 16.5144,\n 16.5970, 16.6793, 16.5925, 16.6746, 16.7564, 16.8379, 16.7520, 16.8333,\n 16.9143, 16.9950, 17.0754, 17.1556, 17.2354, 17.1507, 17.2304])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn owns the book.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330, 1.3472,\n 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547, 0.9802, 0.8165,\n 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428, 0.8083, 0.6794, 1.0000,\n 0.8729, 0.7505, 0.6325, 0.5185, 0.8165, 0.7035, 0.5941, 0.4880, 0.7698,\n 1.0441, 0.9366, 0.8321, 0.7303, 0.9918, 1.2472, 1.1446, 1.0445, 0.9467,\n 1.1918, 1.4317, 1.3333, 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857,\n 0.7006, 0.6172, 0.8412, 0.7581, 0.6765, 0.8944, 1.1088, 1.3198, 1.2366,\n 1.1547, 1.3606, 1.5635, 1.4812, 1.6803, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.7450, 1.9333, 1.8543, 1.7765, 1.6997, 1.6239, 1.5492, 1.7321,\n 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771, 1.6524, 1.8257,\n 1.7552, 1.9262, 1.8559, 1.7865, 1.9548, 1.8856, 1.8173, 1.9829, 2.1470,\n 2.0785, 2.2405, 2.1723, 2.1049, 2.0381, 2.1974, 2.1309, 2.0651, 2.2222,\n 2.1567, 2.3120, 2.2468, 2.1822, 2.3354, 2.4874, 2.4227, 2.5731, 2.5087,\n 2.4449, 2.5934, 2.5298, 2.4669, 2.6135, 2.7591, 2.6961, 2.6336, 2.7775,\n 2.7153, 2.6536, 2.7958, 2.7344, 2.6735, 2.6131, 2.5532, 2.6933, 2.6336,\n 2.5744, 2.7129, 2.8505, 2.7913, 2.9277, 2.8687, 2.8101, 2.9451, 2.8868,\n 2.8288, 2.9625, 3.0952, 3.0373, 2.9798, 2.9227, 2.8660, 2.8098, 2.9406,\n 2.8845, 2.8288, 2.7735, 2.7186, 2.8478, 2.7930, 2.7386, 2.8666, 2.9938,\n 2.9394, 3.0657, 3.0114, 2.9575, 3.0827, 3.0290, 2.9756, 3.0997, 3.2230,\n 3.1696, 3.1166, 3.0638, 3.0114, 2.9593, 3.0811, 3.0292, 3.1502, 3.0984,\n 3.2186, 3.3381, 3.2863, 3.2348, 3.1836, 3.1327, 3.0821, 3.0317, 2.9817,\n 3.0995, 3.2167, 3.1667, 3.1169, 3.2332, 3.1836, 3.1342, 3.2496, 3.3645,\n 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe persuaded Mary to leave and Sue to stay.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "45.2%", + "z-score": "6.59", + "p value": "2.21e-11", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.3140, 9.4346, 9.3408, 9.2480, 9.1561, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.7104, 8.6238, 8.7439, 8.6581, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.5607, 8.4788, 8.3977, 8.3172,\n 8.2375, 8.3550, 8.2760, 8.1976, 8.1198, 8.0427, 7.9663, 7.8905,\n 7.8153, 7.7407, 7.6667, 7.5933, 7.7096, 7.6368, 7.5644, 7.4927,\n 7.4215, 7.3508, 7.4662, 7.3960, 7.3263, 7.2572, 7.3717, 7.3030,\n 7.2348, 7.1670, 7.2807, 7.3937, 7.3263, 7.2594, 7.1929, 7.1270,\n 7.2391, 7.3506, 7.2849, 7.2197, 7.1549, 7.0905, 7.0266, 6.9631,\n 6.9000, 6.8373, 6.7751, 6.7132, 6.8233, 6.9330, 7.0420, 6.9803,\n 6.9190, 6.8580, 6.7974, 6.7372, 6.8454, 6.7854, 6.7259, 6.6667,\n 6.6078, 6.7151, 6.6565, 6.5983, 6.5404, 6.6469, 6.5893])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMost people probably consider, even though the courts didn't actually find, Klaus guilty of murder.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.8165, -0.6783, -0.7213, -0.7641, -0.6274, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "42", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "69.0%", + "z-score": "6.59", + "p value": "2.16e-11", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660, 3.6566,\n 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855, 5.4271, 5.6614, 5.8889,\n 6.1101, 6.3255, 6.1137, 5.9106, 6.1237, 6.3317, 6.1389, 5.9530, 6.1584,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary beautifully plays the violin.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "33.3%", + "z-score": "1.53", + "p value": "0.0633", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.5275])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nClearly, John probably will immediately learn French perfectly.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "32.0%", + "z-score": "1.81", + "p value": "0.0354", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.4812, 1.4003, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.4446, 1.6164, 1.5483, 1.7178, 1.6499,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.6186, 1.5556, 1.4931, 1.4313, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.5945, 1.7465, 1.6865,\n 1.8370, 1.7772, 1.9261, 1.8665, 1.8074])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "156", + "Fraction of T in Greenlist": "78.4%", + "z-score": "17.4", + "p value": "4.57e-68", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.8926, 9.0520, 9.2091, 9.3638, 9.5163, 9.6667,\n 9.8150, 9.9613, 10.1057, 10.2482, 10.3890, 10.5280, 10.6654, 10.8012,\n 10.9355, 11.0682, 11.1995, 11.3294, 11.4579, 11.5851, 11.7110, 11.8357,\n 11.9591, 12.0814, 12.2025, 12.3225, 12.4414, 12.5592, 12.6760, 12.7918,\n 12.9066, 13.0204, 13.1333, 13.2453, 13.0932, 12.9437, 13.0564, 13.1681,\n 13.2791, 13.3891, 13.4983, 13.6067, 13.7143, 13.8211, 13.9271, 13.7862,\n 13.8922, 13.9974, 14.1018, 14.2055, 14.3086, 14.4109, 14.5125, 14.3778,\n 14.4794, 14.3470, 14.2163, 14.3183, 14.1898, 14.2915, 14.3927, 14.4931,\n 14.3676, 14.4679, 14.3443, 14.4444, 14.5439, 14.6428, 14.7411, 14.8388,\n 14.9359, 14.8162, 14.6978, 14.5807, 14.4649, 14.5629, 14.4487, 14.3357,\n 14.4338, 14.5313, 14.4200, 14.5173, 14.4075, 14.5045, 14.3961, 14.4928,\n 14.5890, 14.4822, 14.5781, 14.6736, 14.7685, 14.8629, 14.7580, 14.6542,\n 14.5513, 14.6459, 14.7400, 14.8337, 14.7324, 14.8257, 14.7255, 14.8187,\n 14.9113, 15.0035, 15.0952, 15.1865, 15.2774, 15.1792, 15.2698, 15.3600,\n 15.4498, 15.5391, 15.4425, 15.3467, 15.2517, 15.1574, 15.2470, 15.3362,\n 15.4250, 15.5134, 15.6014, 15.6891, 15.7763, 15.8631, 15.9496, 15.8575,\n 15.7661, 15.8525, 15.9385, 15.8481, 15.9339, 16.0194, 16.1045, 16.1892,\n 16.2736, 16.1846, 16.0961, 16.1805, 16.2644, 16.3481, 16.4314, 16.5144,\n 16.5970, 16.6793, 16.7614, 16.8430, 16.9244, 17.0055, 17.0862, 17.0000,\n 17.0806, 17.1609, 17.2408, 17.3205, 17.3999, 17.3149, 17.3941])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nSue gave to Bill a book.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.9393, 0.8716, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.3101, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.3213, 1.2577, 1.4222, 1.5852,\n 1.5213, 1.6823, 1.8419, 1.7778, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 1.0139, 0.9623,\n 1.1028, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.2441, 1.3779, 1.3278, 1.4606,\n 1.5926, 1.7237, 1.6732, 1.8033, 1.7529, 1.7028, 1.6530, 1.7817,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.7408,\n 1.8660, 1.8175, 1.9419, 2.0656, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.6843, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "53.9%", + "z-score": "8.24", + "p value": "8.47e-17", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 4.1003, 4.2601, 4.1761, 4.3339, 4.4901, 4.6448, 4.5611, 4.4783,\n 4.6311, 4.7823, 4.7001, 4.8497, 4.9980, 5.1450, 5.2906, 5.2085,\n 5.3526, 5.4956, 5.4140, 5.3333, 5.4747, 5.6149, 5.7540, 5.8919,\n 6.0287, 6.1644, 6.0837, 6.2183, 6.3517, 6.4842, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.7155, 6.8439, 6.9714, 6.8922, 7.0187, 7.1443,\n 7.2691, 7.1904, 7.3143, 7.2363, 7.3592, 7.4813, 7.4039, 7.3271,\n 7.2510, 7.3721, 7.2966, 7.2217, 7.3419, 7.2675, 7.3869, 7.5056,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.8988, 8.0139, 8.1282, 8.2420])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe men will all leave.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.0381, 1.8889, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.7358, -2.7701, -2.8043, -2.8383, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn went home.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.9623, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.9847,\n 0.9180, 1.0954, 1.2710, 1.2039, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.4664, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.6008, 1.5430, 1.6906, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.5519, 1.6958, 1.6398, 1.7823,\n 1.9237, 1.8676, 2.0078, 1.9518, 1.8962, 1.8411, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.8446, 1.9799, 1.9263, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.8490, 1.7970, 1.9291, 2.0604, 2.0083,\n 2.1386, 2.0866, 2.0350, 1.9837, 2.1125, 2.0613, 2.0105, 1.9599,\n 1.9097, 2.0369, 1.9868, 2.1131, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 2.1145, 2.0656, 2.1886, 2.3110, 2.2620, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.1418, 2.0943, 2.2141, 2.1667,\n 2.2857, 2.4042, 2.3567, 2.3094, 2.2624, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.4060, 9.5520, 9.3956, 9.5406, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.6676, 9.5230, 9.6632, 9.8020, 9.6612, 9.7989, 9.6612, 9.7980,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.3288, 10.4581, 10.3280,\n 10.1999, 10.0737, 9.9495, 9.8271, 9.9570, 10.0857, 9.9656, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.7348, 10.6232, 10.7429, 10.8616, 10.7518, 10.8699,\n 10.9870, 11.1033, 10.9955, 11.1111, 11.2259, 11.3399, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.4405, 12.3377, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.8618, 12.7622, 12.6635, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.1730, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.5714, 13.4780, 13.5746, 13.6707, 13.7663, 13.8615,\n 13.9561, 14.0503, 13.9585, 13.8675, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.3330, 14.4248, 14.3352, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.7023, 14.6155, 14.7049, 14.7939, 14.8825, 14.9707,\n 15.0585, 15.1460, 15.0605, 14.9755, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.4085, 15.4940, 15.4103, 15.3272, 15.4126, 15.4976, 15.5823, 15.6667,\n 15.7507, 15.8344, 15.7524, 15.6709, 15.7545, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThey represented seriously to the dean Mary as a genuine linguist.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nUs love they.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "187", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "68.4%", + "z-score": "13.7", + "p value": "3.77e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.5021, 9.6309, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.4304, 10.3257, 10.4444, 10.3411, 10.4592, 10.5763, 10.4745,\n 10.5909, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.1860, 11.0883, 11.1991, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.9863, 11.8937, 11.8018, 11.9060, 11.8151, 11.9187, 12.0218, 11.9319,\n 12.0345, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.5615, 12.4746, 12.5732, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.1957, 13.1129, 13.2068, 13.1246, 13.2182, 13.3113, 13.2299,\n 13.3227, 13.2419, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.8007, 13.7215])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nIt is nice to go abroad.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660, 2.8868,\n 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 1.8856, 1.7321, 1.5852, 1.4444,\n 1.3093, 1.6082, 1.4757, 1.3480, 1.6330, 1.9096, 2.1783, 2.4398, 2.6943,\n 2.5627, 2.4351, 2.3113, 2.1909, 2.4345, 2.3163, 2.2011, 2.0889, 1.9795,\n 2.2133, 2.4422, 2.3333, 2.2269, 2.1229, 2.0211, 2.2418, 2.1412, 2.0428,\n 1.9462, 1.8516, 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366,\n 1.4434, 1.6471, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492, 1.4755,\n 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.2309, 1.1628, 1.0954,\n 1.0289, 1.2039, 1.1375, 1.3101, 1.2439, 1.4142, 1.5828, 1.5164, 1.4506,\n 1.3856, 1.3213, 1.4863, 1.4222, 1.3587, 1.2959, 1.2337, 1.3954, 1.5556,\n 1.4931, 1.4313, 1.3700, 1.3093, 1.4664, 1.4059, 1.3460, 1.2865, 1.2276,\n 1.1693, 1.1114, 1.0541, 0.9972, 1.1500, 1.0932, 1.0370, 1.1877, 1.3373,\n 1.4857, 1.4289, 1.3725, 1.3166, 1.2611, 1.4071, 1.3517, 1.2968, 1.2423,\n 1.1882, 1.3318, 1.4744, 1.4201, 1.3663, 1.3128, 1.2597, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366, 0.8868,\n 0.8374, 0.9739, 1.1094, 1.0598, 1.0105, 0.9615, 0.9129, 1.0465, 0.9979,\n 0.9497, 0.9017, 0.8540, 0.9858, 1.1169, 1.0690, 1.0215, 0.9742, 0.9272,\n 1.0565, 1.0096, 0.9629, 0.9165, 0.8704, 0.8245, 0.7789, 0.7336, 0.6885,\n 0.6437, 0.5991, 0.7255, 0.8513, 0.9763, 0.9313, 0.8866, 0.8422, 0.7979,\n 0.9215, 0.8773, 0.8333, 0.7896, 0.7461, 0.8682, 0.9897, 0.9461, 0.9027,\n 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "178", + "Fraction of T in Greenlist": "89.4%", + "z-score": "21", + "p value": "3.59e-98", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 5.7155, 5.9604, 6.1968, 6.4254, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 7.4194, 7.6120, 7.8003, 7.9845, 8.1650,\n 7.9398, 8.1192, 8.2952, 8.4678, 8.6373, 8.8039, 8.9677, 9.1287,\n 8.9265, 9.0869, 9.2447, 9.0520, 9.2091, 9.3638, 9.5163, 9.6667,\n 9.8150, 9.9613, 10.1057, 10.2482, 10.3890, 10.5280, 10.3540, 10.4926,\n 10.6296, 10.7650, 10.8989, 11.0313, 11.1622, 11.2918, 11.4201, 11.5470,\n 11.6727, 11.5128, 11.6382, 11.7624, 11.8853, 12.0071, 12.1278, 12.2474,\n 12.3660, 12.4835, 12.6000, 12.7155, 12.5669, 12.6822, 12.7965, 12.9099,\n 13.0225, 13.1341, 13.2448, 13.3547, 13.4638, 13.5721, 13.6796, 13.5401,\n 13.6474, 13.7539, 13.8597, 13.9648, 14.0691, 14.1727, 14.2756, 14.3778,\n 14.4794, 14.5803, 14.4484, 14.5492, 14.6494, 14.7489, 14.8478, 14.9461,\n 15.0437, 15.1408, 15.2374, 15.3333, 15.4287, 15.3034, 15.3987, 15.4935,\n 15.5877, 15.6814, 15.7746, 15.8673, 15.9594, 16.0511, 16.1423, 16.2330,\n 16.1133, 16.2040, 16.2941, 16.3838, 16.4731, 16.5619, 16.6503, 16.7382,\n 16.8257, 16.9127, 16.9994, 16.8846, 16.9712, 17.0574, 17.1432, 17.2286,\n 17.3136, 17.3981, 17.4824, 17.5662, 17.6497, 17.7328, 17.6224, 17.7054,\n 17.7881, 17.8704, 17.9524, 18.0340, 18.1153, 18.1962, 18.2768, 18.3571,\n 18.4370, 18.3305, 18.4104, 18.4900, 18.5693, 18.6482, 18.7268, 18.8051,\n 18.8832, 18.9609, 19.0383, 19.1154, 19.0124, 19.0895, 19.1663, 19.2428,\n 19.3190, 19.3950, 19.4706, 19.5460, 19.6211, 19.6960, 19.7705, 19.6708,\n 19.7453, 19.8196, 19.8936, 19.9674, 20.0409, 20.1141, 20.1871, 20.2599,\n 20.3324, 20.4047, 20.3078, 20.3801, 20.4521, 20.5238, 20.5954, 20.6667,\n 20.7377, 20.8086, 20.8792, 20.9496, 21.0197, 20.9256, 20.9957])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary intended John to go abroad.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641, 3.7808, 3.5382,\n 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998, 3.1177, 2.9439, 2.7778,\n 2.6186, 2.4659, 2.3190, 2.1776, 2.0412, 2.3116, 2.1783, 2.4398, 2.3094,\n 2.1831, 2.0605, 1.9415, 1.8257, 1.7132, 1.6036, 1.4968, 1.3926, 1.2910,\n 1.5323, 1.7685, 1.6667, 1.5671, 1.4697, 1.3744, 1.6013, 1.5068, 1.4142,\n 1.6348, 1.5430, 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.6131, 1.8185,\n 1.7321, 1.9335, 1.8477, 1.7634, 1.6803, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910, 1.2189,\n 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385, 0.6732, 0.6086,\n 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.4714, 0.6448, 0.8165, 0.7543,\n 0.9238, 0.8617, 0.8003, 0.9671, 1.1323, 1.0705, 1.0094, 1.1721, 1.1111,\n 1.2719, 1.4313, 1.3700, 1.3093, 1.2492, 1.4059, 1.3460, 1.2865, 1.2276,\n 1.1693, 1.3231, 1.4757, 1.6271, 1.7772, 1.7179, 1.6591, 1.6008, 1.7488,\n 1.6906, 1.6330, 1.5758, 1.7217, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410,\n 1.5842, 1.7264, 1.6710, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.2257, 1.1746, 1.3112, 1.2603,\n 1.2096, 1.3448, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780, 1.4105, 1.5423,\n 1.4923, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472, 1.1991, 1.1513, 1.1038,\n 1.0565, 1.0096, 0.9629, 1.0911, 1.0445, 1.1717, 1.1251, 1.0788, 1.2049,\n 1.3303, 1.2839, 1.4084, 1.3620, 1.3159, 1.4393, 1.5621, 1.5159, 1.4699,\n 1.4241, 1.3786, 1.3333, 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668,\n 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.1882, 8.3557, 8.5206, 8.6828, 8.5057, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 9.9653, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.0820, 9.9392, 9.7989, 9.6612, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.4614, 10.5903, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.7099, 10.5830, 10.7084, 10.8328, 10.9560, 10.8321,\n 10.7098, 10.5893, 10.4704, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.1990, 11.3161, 11.4323, 11.5476, 11.4345, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.8895, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.2207, 12.3289, 12.4365, 12.5434, 12.4370, 12.5434, 12.6491,\n 12.7542, 12.6495, 12.5460, 12.6508, 12.7550, 12.8586, 12.9616, 13.0639,\n 12.9624, 13.0643, 13.1657, 13.2665, 13.3667, 13.4664, 13.5655, 13.6640,\n 13.5647, 13.6630, 13.7606, 13.8578, 13.7599, 13.6629, 13.7599, 13.8564,\n 13.9524, 14.0479, 14.1429, 14.0475, 14.1422, 14.2364, 14.3302, 14.4234,\n 14.5162, 14.6086, 14.7005, 14.6071, 14.6987, 14.7899, 14.8807, 14.7885,\n 14.6970, 14.7877, 14.8779, 14.9677, 15.0571, 15.1461, 15.0560, 15.1448,\n 15.2332, 15.3211, 15.4087, 15.4959, 15.5828, 15.6692, 15.5808, 15.6670,\n 15.7529, 15.8384, 15.7509, 15.6641, 15.7495, 15.8345, 15.9193, 16.0036,\n 16.0877, 16.0020, 16.0858, 16.1693, 16.2525, 16.3353, 16.4178, 16.5000,\n 16.5819, 16.4976, 16.5793, 16.6607, 16.7417, 16.6584, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI remembered having kissed Mary.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI can't believe Fred won't, either.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "12.1%", + "z-score": "-4.22", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.9336, -2.9775, -3.0210, -3.0641,\n -2.8724, -2.9161, -2.9593, -3.0022, -3.0448, -3.0870, -3.1288, -3.1704,\n -3.2116, -3.2525, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.3131, -3.3526, -3.3918, -3.4308, -3.4694, -3.5079, -3.5460, -3.5839,\n -3.6216, -3.6590, -3.4879, -3.3182, -3.3566, -3.3947, -3.2276, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.3447, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.5590,\n -3.5941, -3.4428, -3.4780, -3.5131, -3.5480, -3.5827, -3.6172, -3.6515,\n -3.6856, -3.5382, -3.5725, -3.6067, -3.6407, -3.6745, -3.7082, -3.7417,\n -3.7750, -3.6310, -3.6645, -3.6979, -3.7311, -3.7641, -3.7970, -3.8297,\n -3.8623, -3.8947, -3.9269, -3.7869, -3.8194, -3.8516, -3.8838, -3.9158,\n -3.9476, -3.9793, -4.0109, -4.0423, -4.0736, -3.9372, -3.9687, -4.0000,\n -4.0312, -4.0622, -4.0931, -4.1239, -4.1546, -4.1851, -4.2155])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "69", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "78.3%", + "z-score": "10.2", + "p value": "8.3e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.0820, 10.2172])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn wants to read Fred's story, and I also want to.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "21.4%", + "z-score": "-1.05", + "p value": "0.854", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -0.9759, -1.0211, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.4316, 7.5661, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.5556, 7.4655, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.6238, 8.5381, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.0453, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.9562, 9.8776, 9.7997, 9.7224, 9.8293, 9.7526, 9.6764,\n 9.6008, 9.5258, 9.4513, 9.5577, 9.6635, 9.7688, 9.6948, 9.6214,\n 9.5485, 9.6532, 9.7574, 9.8611, 9.7886, 9.8918, 9.9944, 9.9224,\n 9.8510, 9.7800, 9.7095, 9.8116, 9.9132, 10.0143, 10.1149, 10.2151,\n 10.3148, 10.4140, 10.5128, 10.6111, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.6944, 10.6256, 10.5573, 10.4893, 10.5859, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe wanted to invite someone, but we couldn't decide who to.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "26.4%", + "z-score": "0.412", + "p value": "0.34", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.1873,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.4121])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "69", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "89.9%", + "z-score": "12.4", + "p value": "7.79e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 8.0408, 8.2219, 8.3993, 8.5732,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.4432, 9.5969, 9.7483, 9.8976, 10.0448, 10.1900, 10.3333,\n 10.4748, 10.6145, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 11.1098,\n 11.2414, 11.3715, 11.5002, 11.3294, 11.4579, 11.5851, 11.7110, 11.8357,\n 11.9591, 12.0814, 12.2025, 12.3225, 12.4414])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary will read Fred's story, and Joe will read Holly's.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.6140, -0.6667, -0.7189, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -0.9304, -0.7807, -0.8266, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.6149, -0.6600, -0.5168, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.0886, -0.1325, 0.0000, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.3760, 0.3333,\n 0.4571, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "12", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "66.7%", + "z-score": "3.33", + "p value": "0.000429", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660, 3.6566,\n 4.0166, 3.6556, 3.3333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary claimed that eating cabbage, Holly shouldn't.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 7.8320, 7.6613, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.7150, 7.8667, 7.7426, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 7.8905, 7.7784, 7.9216, 8.0632, 7.9530, 7.8444, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.0495, 7.9455, 8.0822, 8.2178, 8.1152, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.5553, 8.4679, 8.5896, 8.7104, 8.6238, 8.5381, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.7610, 8.8778, 8.7952, 8.7133,\n 8.8294, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.9319, 9.0452,\n 8.9660, 8.8874, 9.0000, 8.9221, 9.0340, 9.1452, 9.0679, 8.9912,\n 9.1018, 9.2118, 9.1357, 9.0601, 9.1694, 9.0944, 9.2032, 9.3113,\n 9.2368, 9.1629, 9.0895, 9.1970, 9.3040, 9.4103, 9.5161, 9.4432,\n 9.5485, 9.6532, 9.5808, 9.6850, 9.7886, 9.8918, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.1558, 10.2565, 10.1855, 10.2856, 10.3853,\n 10.3148, 10.2447, 10.3439, 10.4427, 10.3730, 10.3038, 10.4021, 10.3333,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.4893, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary came to be introduced by the bartender and I also came to be.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "42", + "# Tokens in Greenlist": "11", + "Fraction of T in Greenlist": "26.2%", + "z-score": "0.178", + "p value": "0.429", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, 0.1782])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.6573, 5.5432, 5.7133, 5.6011, 5.7689, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.4006, 6.5514, 6.7006, 6.8483, 6.7469, 6.6469,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.1591, 7.2960, 7.2029, 7.3386, 7.2466,\n 7.1556, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.7268, 7.6376,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.6328, 7.5472, 7.4625, 7.5895,\n 7.7155, 7.8406, 7.9649, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.9783, 8.8978, 8.8179, 8.9319, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.4438, 9.3659,\n 9.2885, 9.2118, 9.3212, 9.4299, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.9940, 9.9184, 10.0231, 10.1273, 10.0523, 9.9778,\n 10.0814, 10.1846, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.7451, 10.8444, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.0913, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.9487, 11.8769, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nIf I can, I will work on it.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -1.8958, -1.9437, -1.9911, -2.0381,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.0739, -2.1172, -2.1602, -2.2030, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.3016, -2.3422, -2.3825, -2.4225, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.4495, -2.4872, -2.5247, -2.5620, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.8043, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.9853, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.6770, 8.7978, 8.9178, 8.8304, 8.9496, 8.8631, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.0453, 9.1615, 9.2768, 9.1927, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.3686, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.5413, 9.6519, 9.5714, 9.6814, 9.7908, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.9846, 9.9067, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.2790, 10.2029, 10.3065, 10.2310, 10.3341,\n 10.4367, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.5181, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 10.8505, 10.9480, 11.0450, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.1218, 11.2171, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJoe's neuroses bother his patrons, and Sally does too.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "16.0%", + "z-score": "-1.04", + "p value": "0.851", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495, 2.8868,\n 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188, 4.3409, 4.0825,\n 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855, 4.9652, 5.2085, 5.0000,\n 5.2372, 5.4678, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.5626, 5.7735,\n 5.6000, 5.8068, 6.0093, 5.8424, 6.0412, 5.8797, 6.0751, 6.2668, 6.1107,\n 5.9588, 6.1477, 6.3333, 6.1859, 6.0421, 6.2251, 6.4051, 6.2651, 6.1283,\n 6.3058, 6.4807, 6.3472, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.4738,\n 6.6395, 6.5166, 6.3960, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.6679, 6.8205, 6.9714, 6.8641,\n 6.7583, 6.9076, 7.0553, 6.9511, 6.8483, 6.9945, 7.1393, 7.0379, 6.9378,\n 7.0812, 7.2232, 7.1243, 7.0268, 7.1674, 7.3068, 7.2104, 7.1152, 7.2532,\n 7.3901, 7.2960, 7.2029, 7.3386, 7.4730, 7.3810, 7.2900, 7.4233, 7.5556,\n 7.4655, 7.3765, 7.5076, 7.6376, 7.5494, 7.4622, 7.5912, 7.7192, 7.6328,\n 7.5472, 7.6742, 7.8003, 7.7155, 7.6315, 7.7566, 7.8808, 7.7976, 7.7152,\n 7.8384, 7.9608, 7.8791, 7.7981, 7.9196, 8.0403, 7.9600, 7.8803, 8.0002,\n 8.1192, 8.0402, 7.9619, 8.0801, 8.1976, 8.1198, 8.0427, 8.1594, 8.2754,\n 8.1988, 8.1229, 8.2381, 8.3526, 8.2772, 8.2024, 8.3162, 8.4293, 8.3550,\n 8.2813, 8.3937, 8.5054, 8.4322, 8.3595, 8.4706, 8.5810, 8.5088, 8.4371,\n 8.5469, 8.6560, 8.5848, 8.5141, 8.6226, 8.7305, 8.6603, 8.5905, 8.6978,\n 8.8045, 8.7351, 8.6662, 8.7724, 8.8780, 8.8094, 8.7414, 8.8464, 8.9509,\n 8.8832, 8.8160, 8.9199, 9.0233, 8.9565, 8.8900, 8.9929, 9.0952, 9.0292,\n 8.9635, 9.0653, 9.1667, 9.1013, 9.0364, 9.1372, 9.2376, 9.1730, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI know which book Jos\u00e9 didn't read for class, and which book Lilly did it for him.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.6641, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.7424, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.7964, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "3.0%", + "z-score": "-5.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.7294, -2.7852, -2.8402, -2.8943, -2.9475, -3.0000,\n -3.0517, -3.1027, -3.1530, -3.2026, -3.2515, -3.2998, -3.3475, -3.3947,\n -3.4412, -3.4873, -3.5327, -3.5777, -3.6222, -3.6662, -3.7097, -3.7528,\n -3.7954, -3.8376, -3.8794, -3.9208, -3.9618, -4.0024, -4.0426, -4.0825,\n -4.1220, -4.1612, -4.2000, -4.2385, -4.2767, -4.3146, -4.3521, -4.3894,\n -4.4264, -4.4630, -4.4994, -4.5356, -4.5714, -4.6070, -4.6424, -4.6775,\n -4.7123, -4.7469, -4.7813, -4.8154, -4.8493, -4.8830, -4.9165, -4.9497,\n -4.9828, -5.0156, -5.0483, -5.0807, -5.1129, -5.1450, -5.1768, -5.2085,\n -5.2400, -5.2713, -5.3024, -5.3333, -5.3641, -5.3947, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -5.8853])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThis is the book which Bob reviewed, and this is the one which Fred won't do it.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.3483, 1.5164, 1.4506, 1.3856, 1.3213, 1.4863, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.6742, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.6299, 0.5864, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "69", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "66.7%", + "z-score": "7.99", + "p value": "6.58e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.1305, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 5.2372, 5.4678, 5.6921, 5.4958, 5.3072, 5.1257, 5.3468, 5.5626, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.0412, 5.8797, 6.0751, 6.2668, 6.4550,\n 6.6398, 6.8214, 7.0000, 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.4066, 7.5707, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.7942, 7.9489, 8.1016, 7.9704, 7.8416, 7.9931])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI know which book Mag read, and which book Bob said that you hadn't.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.4631, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -1.8856,\n -1.9345, -1.7496, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.0224, -2.0642, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -1.8571, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.2258, -2.0943, -2.1306, -2.0000,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "178", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "58.4%", + "z-score": "10.3", + "p value": "3.55e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.2016, 7.0973, 6.9945, 6.8931,\n 7.0379, 6.9378, 7.0812, 7.2232, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.7555, 7.6603, 7.7937, 7.9259,\n 8.0571, 7.9630, 7.8699, 7.7778, 7.9079, 7.8168, 7.9460, 7.8558,\n 7.7667, 7.6785, 7.5912, 7.7192, 7.8463, 7.7598, 7.6742, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.6734, 7.7976, 7.7152, 7.6335, 7.7567,\n 7.6758, 7.7981, 7.9196, 8.0403, 7.9600, 8.0798, 8.1989, 8.3172,\n 8.4348, 8.5516, 8.4718, 8.5879, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.6190, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.9648, 10.0701, 9.9940, 9.9184, 10.0231, 10.1273, 10.0523, 9.9778,\n 10.0814, 10.0074, 9.9340, 10.0371, 10.1398, 10.2419, 10.3435, 10.2706,\n 10.3717, 10.2993])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI know which book Mag read, and which book Bob read my report that you hadn't.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.4631, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -1.9863, -2.0303, -2.0739, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -1.8953, -1.9379, -1.9803,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -1.8571, -1.8983, -1.9392, -1.7913, -1.6444, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.1532, -2.1896, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.0726, -2.1086, -2.1444, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "111", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "58.6%", + "z-score": "8.17", + "p value": "1.61e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735, 5.4611, 5.7155,\n 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569, 5.8890, 5.6614, 5.8889,\n 5.6737, 5.8966, 6.1137, 5.9106, 5.7155, 5.5277, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.5234, 5.7229, 5.5705, 5.7664,\n 5.9588, 5.8108, 5.6667, 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997,\n 5.6830, 5.8635, 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648,\n 6.9282, 6.8031, 6.9646, 7.1240, 7.0014, 6.8810, 6.7626, 6.9204, 6.8041,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878, 7.6339,\n 7.7784, 7.9216, 8.0632, 7.9530, 8.0934, 8.2325, 8.1240, 8.0171, 7.9115,\n 8.0495, 7.9455, 8.0822, 7.9796, 7.8782, 7.7782, 7.9138, 8.0483, 7.9495,\n 7.8520, 7.7555, 7.6603, 7.7937, 7.6995, 7.8318, 7.7387, 7.6466, 7.7778,\n 7.9079, 8.0370, 8.1651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI'm sure I would like him to eat fruit more than I would cookies.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -1.1206, -1.1761, -0.9847,\n -1.0404, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.6667, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.7213, -0.5843, -0.6274, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.6128, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.1689, 8.3333,\n 8.1654, 8.0017, 7.8420, 8.0064, 8.1684, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.1051, 9.2469, 9.3871, 9.2536,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.4087, 9.5443, 9.6786, 9.5534,\n 9.4301, 9.5637, 9.4425, 9.3231, 9.4560, 9.5876, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.0242, 9.9146, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.2623, 11.3740, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.9288, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.3655, 12.2694, 12.1741, 12.2778,\n 12.1836, 12.0902, 12.1936, 12.2963, 12.3985, 12.5001, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.5105,\n 13.6050, 13.5176, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 13.9959, 14.0872, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.8034, 14.8912, 14.9786, 15.0657, 15.1524,\n 15.2387, 15.1553, 15.0726, 15.1587, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.5870, 15.6709, 15.7545, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nRusty talked about himself only after Mary did talk about him.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.5627, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 2.3238, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.4495, 2.3445, 2.5621, 2.4585, 2.6713, 2.8804, 3.0861,\n 2.9824, 3.1840, 3.0817, 2.9814, 2.8830, 2.7863, 2.6914, 2.8868,\n 3.0793, 2.9848, 2.8919, 2.8006, 2.9887, 2.8983, 3.0833, 2.9938,\n 3.1760, 3.3558, 3.5333, 3.4438, 3.6187, 3.5301, 3.4427, 3.3566,\n 3.2717, 3.1879, 3.3587, 3.5277, 3.4442, 3.3619, 3.2806, 3.4466,\n 3.3659, 3.5298, 3.4498, 3.6116, 3.7717, 3.9302, 3.8503, 4.0069,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.8512, 3.7778, 3.9263, 3.8534, 4.0004, 4.1461,\n 4.2907, 4.2178, 4.3609, 4.2885, 4.2167, 4.1457, 4.0753, 4.0056,\n 4.1464, 4.2862, 4.2167, 4.1478, 4.0795, 4.2176, 4.1498, 4.2866,\n 4.2191, 4.3548, 4.4895, 4.6232, 4.5557, 4.6883, 4.6212, 4.5547,\n 4.4887, 4.4233, 4.3583, 4.4891, 4.6191, 4.5543, 4.4901, 4.4264,\n 4.5549, 4.4915, 4.6190, 4.5560, 4.6825, 4.8083, 4.9333, 4.8702,\n 4.9943, 4.9316, 4.8693, 4.8074, 4.7460, 4.6850, 4.8076, 4.9295,\n 4.8687, 4.8083, 4.7483, 4.8690, 4.8093, 4.9292, 4.8698, 4.9889,\n 5.1073, 5.2251, 5.1657, 5.2827, 5.2235, 5.1647, 5.1063, 5.0483,\n 4.9906, 5.1064, 5.2215, 5.1640, 5.1068, 5.0499, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.2775, 5.3898, 5.5015, 5.4451, 5.5562, 5.5000,\n 5.4442, 5.3887, 5.3335, 5.2786, 5.3886, 5.4981, 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "156", + "Fraction of T in Greenlist": "78.8%", + "z-score": "17.5", + "p value": "1.04e-68", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 8.0076, 8.1763, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 8.9567, 9.1084, 9.2582,\n 9.1002, 8.9456, 8.7943, 8.9443, 9.0924, 8.9455, 9.0924, 8.9489,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.0673, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.8321,\n 10.7098, 10.8327, 10.9546, 10.8347, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.4311, 11.5476, 11.4311, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.4384,\n 12.5462, 12.6533, 12.7597, 12.8653, 12.7569, 12.6496, 12.5434, 12.6491,\n 12.7542, 12.8586, 12.9624, 13.0656, 13.1681, 13.2701, 13.3714, 13.4722,\n 13.5724, 13.6720, 13.7710, 13.8695, 13.9675, 13.8654, 13.9630, 14.0601,\n 14.1567, 14.2527, 14.1524, 14.0530, 13.9544, 14.0505, 14.1462, 14.2413,\n 14.3360, 14.4301, 14.5238, 14.6170, 14.7098, 14.8021, 14.8940, 14.9854,\n 15.0763, 15.1669, 15.2570, 15.1618, 15.2517, 15.3411, 15.4302, 15.5188,\n 15.4250, 15.3320, 15.2397, 15.3284, 15.4167, 15.5046, 15.5922, 15.6793,\n 15.7661, 15.8525, 15.9385, 16.0242, 16.1095, 16.1945, 16.2791, 16.3633,\n 16.4472, 16.3577, 16.4414, 16.5247, 16.6078, 16.6905, 16.6021, 16.5144,\n 16.4272, 16.5100, 16.5925, 16.6746, 16.7564, 16.8379, 16.9191, 17.0000,\n 17.0806, 17.1609, 17.2408, 17.3205, 17.3999, 17.4790])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nFred talked about everything before Rusty did talk about something.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -0.9744, -1.0328,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.4162, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.4", + "p value": "1.86e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.7150, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 8.9355, 9.0711, 8.9550, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.5368, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 9.7986, 9.9187, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.6894, 10.8012, 10.7074, 10.6145,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.4762, 11.5797, 11.4935, 11.4080, 11.5111, 11.4263,\n 11.5290, 11.4450, 11.5471, 11.6487, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.2298, 12.1502, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.2812, 12.2033, 12.1260, 12.2214, 12.3163, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.1073, 12.2016, 12.2954, 12.3888, 12.3143, 12.4074, 12.3333,\n 12.2598, 12.1867, 12.2794, 12.3718])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nJohn often meets Mary.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "180", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "29.4%", + "z-score": "1.38", + "p value": "0.0842", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.9661, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 1.1547, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 1.0141, 0.9567, 0.8997, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812, 1.1316, 1.2808, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.0879, 1.0371, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.3448, 1.2943, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.0215, 0.9742, 1.1038, 1.0565, 1.1852, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.1251, 1.2514, 1.3771])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.4850, 9.6347, 9.7823, 9.9279, 10.0718, 10.2138, 10.3540, 10.4926,\n 10.3237, 10.4618, 10.2976, 10.4350, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.5269, 10.3758, 10.5096, 10.6421, 10.4952, 10.6270, 10.4834, 10.6145,\n 10.4739, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 11.2376, 11.1026,\n 10.9697, 10.8388, 10.7099, 10.5830, 10.7084, 10.8328, 10.7084, 10.5859,\n 10.7098, 10.8327, 10.9546, 11.0755, 11.1954, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.7809, 11.6632, 11.5470, 11.6620, 11.5476, 11.6620, 11.7757,\n 11.6631, 11.5519, 11.6652, 11.7778, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.2221, 12.1164, 12.0118, 11.9083, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.2360, 12.3419, 12.2414, 12.3468, 12.2474,\n 12.3524, 12.2541, 12.3586, 12.4625, 12.5657, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.3770, 13.2834, 13.3810, 13.2882, 13.3854, 13.2936, 13.2025, 13.2995,\n 13.3960, 13.3059, 13.2166, 13.1279, 13.0400, 13.1364, 13.2324, 13.1453,\n 13.2410, 13.1547, 13.2499, 13.1644, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.6514, 13.5683, 13.6604, 13.7521,\n 13.6698, 13.7612, 13.8522, 13.9427, 13.8613, 13.9515, 13.8707, 13.9606,\n 13.8804, 13.8007, 13.8904, 13.9797, 13.9007, 13.8222, 13.7442, 13.6667,\n 13.7559, 13.8447, 13.7679, 13.8564, 13.7801, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe problem perceives easily.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.9%", + "z-score": "0.288", + "p value": "0.387", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, 0.0000,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.3800, 0.5053, 0.4620, 0.4189, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 8.8426, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 9.7312, 9.8754,\n 10.0178, 10.1585, 10.2976, 10.4350, 10.5709, 10.7052, 10.5472, 10.3923,\n 10.5269, 10.3758, 10.5096, 10.3621, 10.4952, 10.6270, 10.7575, 10.8866,\n 10.7442, 10.8727, 10.7333, 10.8612, 10.9878, 11.1132, 10.9777, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 12.0386, 11.9138, 12.0289, 11.9062, 12.0208,\n 11.9001, 11.7809, 11.8953, 12.0089, 11.8918, 12.0049, 11.8896, 11.7757,\n 11.8885, 12.0005, 11.8885, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.4370, 12.3289, 12.2221, 12.3299, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.6508, 12.5485, 12.6529, 12.7567, 12.6557,\n 12.7590, 12.6592, 12.7622, 12.6635, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.7755, 12.6800, 12.5853, 12.6867, 12.7875, 12.8877, 12.7943, 12.8942,\n 12.8017, 12.7100, 12.8095, 12.9085, 12.8179, 12.7279, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 13.9042, 13.8193, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.2046, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.3087, 14.2282, 14.3166, 14.2367, 14.3248, 14.4126, 14.5000,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nA hundred men surrounded the fort.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.3907, -0.2222, -0.2765, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.6333, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.5671, 1.7963, 2.0211, 2.2418, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.3851, 2.5873, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.4163, 2.6098, 2.8006, 2.9887, 3.1743, 3.3574, 3.5382,\n 3.7166, 3.6242, 3.5333, 3.7087, 3.6187, 3.7916, 3.7025, 3.8730,\n 4.0415, 4.2080, 4.3727, 4.5356, 4.4462, 4.3580, 4.2710, 4.1851,\n 4.3451, 4.5035, 4.6603, 4.8154, 4.9691, 5.1212, 5.2719, 5.4212,\n 5.3345, 5.2489, 5.3964, 5.3116, 5.4576, 5.3736, 5.5181, 5.6614,\n 5.8034, 5.9442, 6.0838, 6.0000, 5.9171, 5.8351, 5.7540, 5.8919,\n 6.0287, 6.1644, 6.2991, 6.4327, 6.3517, 6.2716, 6.4040, 6.3246,\n 6.2459, 6.3770, 6.2990, 6.4291, 6.3517, 6.4807, 6.6089, 6.7361,\n 6.6591, 6.5828, 6.5072, 6.4322, 6.3580, 6.4838, 6.4101, 6.5350,\n 6.6591, 6.5857, 6.5130, 6.4409, 6.3694, 6.4923, 6.6144, 6.7358,\n 6.6645, 6.7850, 6.9048, 7.0238, 7.1421, 7.0711, 7.0006, 7.1181,\n 7.0481, 7.1647, 7.0952, 7.2111, 7.3263, 7.4409, 7.5548, 7.6681,\n 7.5988, 7.5299, 7.4616, 7.3937, 7.5061, 7.6179, 7.7291, 7.8397,\n 7.9497, 7.8820, 7.8147, 7.9241, 7.8572, 7.7908, 7.8995, 7.8335,\n 7.9415, 7.8759, 7.9833, 8.0902, 8.1966, 8.1312, 8.0663, 8.0018,\n 7.9377, 7.8740, 7.9796, 7.9162, 8.0212, 8.1258, 8.0627, 8.0000,\n 7.9377, 7.8758, 7.9796, 8.0829, 8.1858, 8.2882, 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe elected me.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "6", + "Fraction of T in Greenlist": "66.7%", + "z-score": "2.89", + "p value": "0.00195", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhich report that John was incompetent did he submit?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.4968, 1.3926, 1.2910, 1.5323, 1.7685, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.9215, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.8477, 2.0455, 1.9604, 2.1546, 2.3462, 2.2611, 2.1773,\n 2.3651, 2.5504, 2.4667, 2.6491, 2.5660, 2.4841, 2.4034, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.2678, 2.1918, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.3126, 2.4814, 2.6485, 2.8138, 2.9775, 3.1394, 3.0641,\n 3.2242, 3.1493, 3.0754, 3.2332, 3.1597, 3.0870, 3.0151, 2.9439,\n 2.8735, 2.8039, 2.9582, 2.8889, 2.8203, 2.9726, 2.9044, 2.8368,\n 2.9872, 3.1363, 3.2841, 3.2163, 3.3627, 3.5079, 3.6519, 3.7947,\n 3.9365, 4.0771, 4.0085, 4.1478, 4.0795, 4.0119, 4.1498, 4.0825,\n 4.0158, 3.9497, 3.8841, 3.8191, 3.7547, 3.8903, 3.8262, 3.7626,\n 3.8968, 3.8335, 3.7707, 3.9036, 4.0356, 4.1667, 4.1038, 4.2339,\n 4.3631, 4.4915, 4.6190, 4.7458, 4.8717, 4.8083, 4.9333, 4.8702,\n 4.8076, 4.9316, 4.8693, 4.8074, 4.7460, 4.6850, 4.6245, 4.5644,\n 4.6867, 4.6268, 4.5674, 4.6887, 4.6295, 4.5707, 4.6911, 4.8107,\n 4.9297, 4.8709, 4.9891, 5.1066, 5.0479, 4.9896, 5.1063, 5.2223,\n 5.1642, 5.2795, 5.2215, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.0513, 4.9953, 4.9397, 5.0529, 4.9975, 4.9425, 5.0548, 5.0000,\n 4.9455, 5.0571, 5.1681, 5.2786, 5.2241, 5.3340, 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.4885, 7.3030,\n 7.4839, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.3283, 8.4887, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.2424, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 10.2172, 10.0750, 9.9352, 10.0701,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.4614, 10.5903, 10.7179, 10.8444,\n 10.7131, 10.5838, 10.7099, 10.8350, 10.9589, 11.0818, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 12.0289, 11.9062, 11.7851,\n 11.6656, 11.7809, 11.6632, 11.5470, 11.4323, 11.3189, 11.2069, 11.3228,\n 11.4378, 11.3276, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.5655,\n 11.6772, 11.7881, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.0167,\n 11.9144, 11.8132, 11.9213, 11.8212, 11.7222, 11.6242, 11.5271, 11.4310,\n 11.5391, 11.6465, 11.5515, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.1366, 12.0476, 12.1492, 12.0611, 11.9737, 11.8870, 11.8010,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.4065, 12.3263, 12.4223, 12.3428, 12.2638, 12.1854,\n 12.1076, 12.0302, 12.1260, 12.2214, 12.1447, 12.0685, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.3705, 12.4638, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.7017, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary has always preferred lemons to limes.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.1831, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.4371, 2.6681, 2.5538, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.6679, 2.5621, 2.7757, 2.9856, 2.8804, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.7928, 2.7005, 2.8919, 2.8006, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.9057, 3.0873, 3.0000, 3.1789, 3.3556, 3.2686, 3.1829, 3.0984,\n 3.2717, 3.4429, 3.3587, 3.2757, 3.4442, 3.6109, 3.5282, 3.6927,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.5443, 3.4702, 3.3968,\n 3.5496, 3.7011, 3.6279, 3.7778, 3.7051, 3.8534, 3.7812, 3.7097,\n 3.6389, 3.7852, 3.9302, 3.8596, 4.0032, 4.1457, 4.0753, 4.0056,\n 3.9365, 4.0771, 4.2167, 4.1478, 4.0795, 4.2176, 4.3547, 4.2866,\n 4.4225, 4.5573, 4.4895, 4.4222, 4.3554, 4.2893, 4.4224, 4.3566,\n 4.4887, 4.6198, 4.5542, 4.4891, 4.4246, 4.3605, 4.2970, 4.2339,\n 4.1713, 4.3004, 4.4286, 4.3661, 4.4933, 4.4312, 4.5575, 4.4956,\n 4.4342, 4.3733, 4.4983, 4.6225, 4.5617, 4.6850, 4.8076, 4.7469,\n 4.6867, 4.6268, 4.7483, 4.8690, 4.8093, 4.7500, 4.8698, 4.9889,\n 4.9297, 5.0480, 5.1657, 5.1066, 5.0479, 4.9896, 4.9317, 5.0483,\n 5.1642, 5.1064, 5.2215, 5.1640, 5.1068, 5.0499, 4.9934, 4.9373,\n 4.8815, 4.8260, 4.9397, 5.0529, 4.9975, 5.1100, 5.0548, 5.1667,\n 5.1117, 5.0571, 5.0027, 5.1137, 5.2241, 5.1698, 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.7812, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.9331, 3.8636, 3.7947,\n 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.9448, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.3774, 4.5013, 4.4413, 4.5644,\n 4.5047, 4.4454, 4.3865, 4.5083, 4.4497, 4.3915, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.9305, 5.0479, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.1086, 5.2213, 5.1655, 5.2776, 5.3891, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735, 5.7177, 5.6622, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHe let the cats which were whining out.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.0793, -0.1571, -0.2335, 0.0000,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.6888, 0.8716, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 1.0290, 0.9671, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.9909, 1.1508, 1.0911,\n 1.0319, 1.1896, 1.3460, 1.2865, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.4105, 1.5423, 1.4923, 1.4427, 1.5731, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.6827, 1.6336, 1.5848, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.5323,\n 1.6555, 1.6087, 1.5621, 1.6843, 1.8058, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "156", + "Fraction of T in Greenlist": "78.4%", + "z-score": "17.4", + "p value": "4.57e-68", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 8.1882, 8.0076, 8.1763, 8.0018, 8.1689, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.6743, 9.8150,\n 9.6676, 9.8072, 9.9454, 9.8020, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 10.9777, 11.1026,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.9338, 12.0499, 12.1651, 12.2794, 12.3928, 12.5053, 12.6170, 12.4922,\n 12.6035, 12.7140, 12.5916, 12.4708, 12.3514, 12.4622, 12.5723, 12.6815,\n 12.7900, 12.8978, 13.0048, 12.8889, 12.9955, 12.8813, 12.9875, 12.8749,\n 12.9807, 13.0859, 13.1904, 13.2942, 13.3974, 13.4999, 13.3902, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.4804, 13.5813, 13.6816, 13.7813, 13.8804,\n 13.9790, 14.0771, 14.1746, 14.2715, 14.3680, 14.4639, 14.5593, 14.6542,\n 14.7486, 14.6459, 14.7400, 14.8337, 14.7324, 14.6319, 14.5324, 14.6262,\n 14.7195, 14.8124, 14.9048, 14.9967, 15.0882, 14.9907, 15.0819, 15.1727,\n 15.0763, 15.1669, 15.2570, 15.3467, 15.4360, 15.5249, 15.6133, 15.7014,\n 15.6070, 15.6949, 15.6014, 15.6891, 15.5965, 15.6839, 15.7709, 15.8575,\n 15.9437, 16.0296, 16.1151, 16.2003, 16.2851, 16.3695, 16.4536, 16.5374,\n 16.6208, 16.7039, 16.7866, 16.6969, 16.7794, 16.8616, 16.7728, 16.6846,\n 16.5970, 16.6793, 16.7614, 16.8430, 16.9244, 17.0055, 17.0862, 17.0000,\n 17.0806, 17.1609, 17.2408, 17.1556, 17.2354, 17.3149, 17.3941])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhat did Bill buy?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 2.0605, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.0889, 2.3238, 2.5538, 2.7791, 2.6667,\n 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.0647, 1.9711, 1.8791, 2.0870, 1.9959, 1.9064, 2.1094, 2.3094,\n 2.5064, 2.4163, 2.6098, 2.5205, 2.7107, 2.8983, 2.8093, 2.7217,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 2.0158, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.6854, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.5164, 1.6828, 1.6166, 1.7809, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.9066, 1.8419, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.5097, 1.6591, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.2611, 1.4071, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.7085, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.2326, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.5159, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.0012, 3.8490, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.8772, 3.7417, 3.9620, 4.1779, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 6.9945, 6.8931,\n 6.7931, 6.6944, 6.5970, 6.7416, 6.6454, 6.5504, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.0211, 6.9282, 6.8364, 6.7456, 6.6559, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.5967, 7.7268, 7.6376,\n 7.5494, 7.4622, 7.5912, 7.7192, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.3813, 8.5030, 8.6238, 8.5381, 8.6581, 8.5732,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.7831, 8.7033, 8.6241, 8.5456, 8.6603,\n 8.5824, 8.5052, 8.6190, 8.7323, 8.8448, 8.9567, 8.8800, 8.8039,\n 8.7284, 8.6535, 8.5792, 8.6903, 8.8008, 8.9107, 9.0200, 9.1287,\n 9.2368, 9.3443, 9.4513, 9.3774, 9.3040, 9.2311, 9.3374, 9.4432,\n 9.5485, 9.6532, 9.7574, 9.8611, 9.9642, 10.0668, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.2273, 10.1558, 10.0848, 10.1855, 10.1149, 10.2151,\n 10.1450, 10.0753, 10.1750, 10.2743, 10.3730, 10.4713, 10.4021, 10.3333,\n 10.2650, 10.1970, 10.1295, 10.2273, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMary saw the boy walking toward the railroad station.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "46.2%", + "z-score": "6.92", + "p value": "2.31e-12", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 2.0466, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.1334, 3.0123, 3.2348, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.4101, 3.6141, 3.8146, 4.0119,\n 3.9001, 3.7905, 3.6831, 3.8759, 3.7700, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.4296, 4.3333, 4.5034, 4.4083, 4.5760, 4.7419, 4.6476,\n 4.5547, 4.7181, 4.8797, 5.0395, 4.9472, 4.8561, 4.7662, 4.9237,\n 4.8347, 4.9904, 4.9023, 5.0562, 5.2086, 5.1212, 5.0350, 5.1855,\n 5.3345, 5.4822, 5.3964, 5.3116, 5.2278, 5.3736, 5.2906, 5.4349,\n 5.3526, 5.4956, 5.6373, 5.5556, 5.4747, 5.6149, 5.7540, 5.8919,\n 5.8114, 5.7318, 5.6530, 5.7894, 5.7112, 5.8464, 5.7689, 5.9029,\n 6.0359, 5.9589, 5.8825, 6.0143, 6.1451, 6.2750, 6.1990, 6.1237,\n 6.0491, 6.1777, 6.1036, 6.2312, 6.1577, 6.2843, 6.4101, 6.3369,\n 6.2644, 6.3892, 6.5130, 6.6361, 6.5639, 6.4923, 6.4213, 6.5433,\n 6.4728, 6.5939, 6.5238, 6.6441, 6.7637, 6.6939, 6.6248, 6.7434,\n 6.8614, 6.9786, 6.9097, 6.8413, 6.7734, 6.8897, 6.8222, 6.9378,\n 6.8707, 6.8041, 6.9189, 6.8527, 6.7869, 6.7217, 6.6568, 6.5924,\n 6.5285, 6.4650, 6.4019, 6.3392, 6.2770, 6.2152, 6.3283, 6.2668,\n 6.2057, 6.3180, 6.4298, 6.5410, 6.4800, 6.4194, 6.3592, 6.4695,\n 6.4096, 6.5193, 6.4597, 6.5688, 6.6774, 6.7854, 6.7259, 6.6667,\n 6.6078, 6.7151, 6.6565, 6.7632, 6.7049, 6.8111, 6.9167])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nA proof that the claim had been. made was giver that John had lied.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.2039, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.3288, 1.2771, 1.4142, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.2623, 1.2136, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.2804, 1.4087, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.4713, 1.5967, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.5621, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.4546, 1.4093, 1.3644, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "108", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "59.3%", + "z-score": "8.22", + "p value": "9.99e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660, 3.6566,\n 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426, 4.0415, 3.8497, 3.6667,\n 3.9279, 4.1812, 4.0056, 4.2515, 4.4907, 4.7237, 4.5547, 4.3916, 4.2339,\n 4.0814, 3.9337, 3.7905, 4.0166, 4.2378, 4.4544, 4.6664, 4.5260, 4.3894,\n 4.5968, 4.8003, 5.0000, 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140,\n 5.6830, 5.8635, 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738,\n 6.3509, 6.2302, 6.1118, 5.9954, 5.8812, 6.0469, 5.9346, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968, 6.3509,\n 6.5033, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393, 7.0379, 7.1813,\n 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139, 8.1483, 8.2816, 8.1816,\n 8.0829, 8.2151, 8.1176, 8.0212, 7.9259, 7.8318, 7.9630, 8.0931, 8.2222])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHe attributed to a short circuit which was caused by an overloaded transducer the fire which destroyed most of my factory.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, 0.0634, 0.2520, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.3303, 0.2740, 0.4364,\n 0.5974, 0.7570, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.9972, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.5203, 0.4714, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.5477,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.3443, 0.3004, 0.4280, 0.3841, 0.5108,\n 0.6367, 0.7620, 0.8866, 0.8422, 0.7979, 0.7539, 0.8773, 0.8333,\n 0.9558, 1.0777, 1.0336, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.3557, 8.1763, 8.0018, 7.8320, 7.6667,\n 7.8355, 8.0017, 7.8420, 8.0064, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.8766, 7.7326, 7.5916, 7.4536, 7.6140, 7.4790, 7.3467, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.1024, 10.2283, 10.3532, 10.2375, 10.3615, 10.4846, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.8542, 10.9727, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.2187, 11.1111, 11.2259, 11.1197, 11.2339, 11.3473,\n 11.2427, 11.3555, 11.2522, 11.3644, 11.4759, 11.5866, 11.4849, 11.3842,\n 11.4945, 11.6041, 11.5048, 11.4065, 11.5157, 11.6242, 11.5271, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.6709, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.9060, 11.8151, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.1622, 12.0749, 11.9883,\n 12.0891, 12.0032, 11.9181, 11.8336, 11.7498, 11.8503, 11.9504, 12.0499,\n 11.9669, 12.0660, 12.1646, 12.0824, 12.1805, 12.0990, 12.1967, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.3764, 12.2987, 12.3935, 12.3163, 12.4109, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.7248, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.1966, 13.1219, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe mayor regarded as being absurd the proposal to build a sidewalk from Dartmouth to Smith.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.2439, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.5613, 1.7154, 1.8682, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.2623, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.5848, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.5323,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.6378, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.1241, 6.9282, 7.1187, 7.3054, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 8.0076, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.3283, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 8.9496,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.3881, 9.5321, 9.3834, 9.2376,\n 9.3811, 9.5230, 9.3811, 9.5219, 9.6612, 9.7989, 9.6612, 9.7980,\n 9.9333, 9.7989, 9.9333, 10.0664, 9.9351, 9.8058, 9.9384, 10.0698,\n 9.9433, 9.8187, 9.9495, 10.0791, 10.2075, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.9585, 10.8423,\n 10.9621, 11.0810, 10.9669, 10.8542, 10.9727, 11.0902, 11.2069, 11.0963,\n 11.2124, 11.1033, 11.2187, 11.3333, 11.2259, 11.3399, 11.4531, 11.3473,\n 11.4599, 11.3555, 11.4675, 11.5788, 11.6894, 11.7992, 11.6966, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.4567, 12.3586, 12.2615, 12.1652, 12.2694, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 12.9011, 12.8095, 12.9085, 12.8179, 12.9165, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.5105,\n 13.6050, 13.5176, 13.4308, 13.5250, 13.4390, 13.5329, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.7350, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.1074, 14.0248, 14.1149, 14.2046, 14.2939, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.7533, 14.6738, 14.5948, 14.6812, 14.6027, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI want that Bill left to remain a secret.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "69.5%", + "z-score": "13.3", + "p value": "1.75e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 8.0632, 7.9530, 7.8444, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.4083, 10.5236, 10.4263, 10.3301,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 11.8151, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.2381, 12.3391, 12.4395, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.3754, 12.2891, 12.3883, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI know a man who Tom drives as drives.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "12", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "25.0%", + "z-score": "0", + "p value": "0.5", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.6573, 5.5432, 5.7133, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.1996, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 6.7886, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.6973, 6.6066, 6.7456, 6.8834, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.3333, 7.2443, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.1605, 7.0759, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.5056, 7.6315, 7.7566, 7.6734, 7.5910, 7.5094, 7.6335, 7.5526,\n 7.6758, 7.5955, 7.5161, 7.6383, 7.7597, 7.8803, 8.0002, 7.9212,\n 8.0402, 8.1585, 8.0801, 8.0024, 7.9253, 8.0427, 7.9663, 8.0829,\n 8.0070, 8.1229, 8.2381, 8.3526, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.2450, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.6322, 9.7380, 9.6635, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.6532, 9.7574, 9.8611, 9.7886, 9.7167, 9.6452, 9.7483,\n 9.6774, 9.7800, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.9752, 9.9060, 10.0061, 10.1058, 10.2050, 10.3038, 10.4021, 10.5000,\n 10.4312, 10.3628, 10.4603, 10.5573, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nDrowning cats, which is against the law, are hard to rescue.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.3109, 0.2657, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.2940, 0.4189, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.0820, 10.2172, 10.3510, 10.2093, 10.0701,\n 9.9333, 10.0673, 9.9333, 9.8015, 9.9351, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.4608, 10.5859,\n 10.7098, 10.8327, 10.7125, 10.8347, 10.9559, 11.0761, 10.9585, 10.8423,\n 10.9621, 11.0810, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.5492,\n 11.4378, 11.3276, 11.2187, 11.3333, 11.2259, 11.3399, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.2360, 12.3419, 12.4471, 12.5517, 12.4516,\n 12.3524, 12.4567, 12.5604, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 12.8766, 12.7812, 12.6867, 12.7875, 12.6939, 12.7943, 12.7017,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.7870, 13.6990, 13.7926, 13.8857, 13.7986, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.2584, 14.1725, 14.0872, 14.0025, 14.0936, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.4651, 14.5535, 14.6416,\n 14.7293, 14.6473, 14.7348, 14.8219, 14.9086, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.0054, 14.9255, 15.0111, 15.0964, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMuriel said nothing else than that she had been insulted.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.5227, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.2722, 0.2261, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.0847, 0.2111, 0.3369, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHimself is understood by Rutherford.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.1968, 5.9214, 6.1546, 6.3805, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 8.2952, 8.4678, 8.6373, 8.4293, 8.2281, 8.0333,\n 8.2052, 8.0178, 8.1882, 8.0076, 7.8320, 7.6613, 7.4952, 7.3333,\n 7.1756, 7.0219, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 8.8667, 9.0068, 9.1455, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.6066,\n 10.4932, 10.3812, 10.5027, 10.3923, 10.5131, 10.4042, 10.2967, 10.1905,\n 10.0855, 9.9817, 9.8792, 9.7778, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 11.0554, 11.1640, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.3629,\n 12.2758, 12.1893, 12.2891, 12.2034, 12.3027, 12.2178, 12.1335, 12.0499,\n 11.9669, 11.8846, 11.8028, 11.7217, 11.6412, 11.5613, 11.6606, 11.5813,\n 11.5026, 11.6016, 11.5234, 11.6220, 11.7200, 11.8176, 11.7401, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.7082, 12.8007, 12.8928, 12.8169, 12.7416, 12.8333,\n 12.9247, 12.8499, 12.9410, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI feel that Arch will show up.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "10", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "30.0%", + "z-score": "0.365", + "p value": "0.358", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.9604, 6.1968, 6.4254, 6.1546, 6.3805, 6.1283,\n 5.8890, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.1241, 7.3131, 7.4983, 7.3054, 7.1187, 7.3030,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.0000,\n 7.8355, 8.0017, 8.1654, 8.0064, 8.1684, 8.3281, 8.4856, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.6461, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 8.8168, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.8058, 9.6786, 9.8116,\n 9.9433, 10.0737, 9.9495, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.4704, 10.5940, 10.7164, 10.8379, 10.7215, 10.6066,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 11.2069, 11.0963,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.2259, 11.3399, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.2522, 11.3644, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 12.0223, 11.9213, 12.0286, 12.1353, 12.2414, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.5657, 12.6684, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.2093, 13.3059, 13.2166, 13.1279, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.7986, 13.8914, 13.8051, 13.8976,\n 13.9896, 14.0813, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.3642, 14.4536, 14.3700, 14.4591, 14.5479, 14.6362, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.7533, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe proof this set is recursive is difficult.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165, 0.5774,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547, 0.9802, 0.8165,\n 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 1.4142, 1.2702, 1.1323, 1.4444,\n 1.7457, 1.6082, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 2.0494, 1.9245,\n 1.8034, 1.6859, 1.9415, 1.8257, 1.7132, 1.9599, 1.8489, 2.0889, 1.9795,\n 1.8728, 2.1054, 2.3333, 2.2269, 2.4495, 2.3445, 2.2418, 2.4585, 2.3570,\n 2.5690, 2.4689, 2.3706, 2.2743, 2.4804, 2.3851, 2.2916, 2.4930, 2.4004,\n 2.5981, 2.5064, 2.4163, 2.6098, 2.8006, 2.7107, 2.8983, 2.8093, 2.7217,\n 2.9057, 2.8189, 3.0000, 2.9140, 2.8292, 2.7456, 2.9231, 2.8402, 2.7585,\n 2.9329, 2.8518, 3.0237, 2.9433, 2.8638, 3.0330, 3.2004, 3.1211, 3.2863,\n 3.2077, 3.1300, 3.2928, 3.2157, 3.3764, 3.2998, 3.2242, 3.1493, 3.3075,\n 3.2332, 3.1597, 3.3156, 3.2426, 3.3968, 3.3243, 3.2525, 3.4047, 3.5556,\n 3.4839, 3.6332, 3.5620, 3.4915, 3.6389, 3.5689, 3.7148, 3.6452, 3.5762,\n 3.5079, 3.6519, 3.5839, 3.5166, 3.6590, 3.5920, 3.7330, 3.6664, 3.6004,\n 3.7399, 3.8784, 3.8125, 3.9497, 3.8841, 3.8191, 3.9549, 3.8903, 4.0249,\n 3.9606, 3.8968, 3.8335, 3.9666, 3.9036, 3.8411, 3.9729, 3.9107, 4.0415,\n 3.9795, 3.9181, 4.0476, 4.1763, 4.1150, 4.2426, 4.1816, 4.1210, 4.2475,\n 4.1872, 4.3128, 4.2527, 4.1931, 4.1338, 4.2582, 4.1992, 4.1406, 4.2639,\n 4.2056, 4.3280, 4.2699, 4.2122, 4.3336, 4.4544, 4.3967, 4.5166, 4.4593,\n 4.4023, 4.5212, 4.4644, 4.5826, 4.5260, 4.4698, 4.4140, 4.5311, 4.4754,\n 4.4202, 4.5364, 4.4813, 4.5968, 4.5419, 4.4873, 4.6020, 4.7161, 4.6616,\n 4.7749, 4.7206, 4.6667, 4.7792, 4.7255, 4.8374, 4.7838, 4.7305, 4.6775,\n 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415, 3.7808, 3.5382,\n 3.8411, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998, 3.5796, 3.3968, 3.6667,\n 3.9279, 4.1812, 4.4272, 4.6663, 4.8990, 5.1257, 5.3468, 5.1723, 5.0037,\n 5.2204, 5.4322, 5.2697, 5.4772, 5.6805, 5.5234, 5.7229, 5.9186, 5.7664,\n 5.6183, 5.4740, 5.6667, 5.8560, 5.7155, 5.5783, 5.4444, 5.3134, 5.4997,\n 5.3716, 5.2463, 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919,\n 6.0622, 5.9438, 5.8275, 5.7133, 5.6011, 5.7689, 5.6585, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.6667, 5.5630, 5.4610, 5.6220, 5.5213, 5.4222, 5.3245,\n 5.4832, 5.6401, 5.5435, 5.6986, 5.8522, 5.7566, 5.9084, 6.0587, 5.9641,\n 6.1128, 6.2601, 6.1664, 6.3122, 6.4566, 6.3640, 6.2725, 6.1820, 6.0927,\n 6.0044, 6.1470, 6.0596, 5.9732, 5.8878, 6.0288, 6.1685, 6.0838, 6.2222,\n 6.3595, 6.2755, 6.4116, 6.5465, 6.4632, 6.5970, 6.7298, 6.6471, 6.7788,\n 6.9094, 6.8274, 6.7462, 6.6658, 6.5861, 6.5072, 6.6365, 6.5583, 6.4807,\n 6.4039, 6.5320, 6.6591, 6.5828, 6.7090, 6.8343, 6.7585, 6.8828, 7.0063,\n 6.9310, 7.0537, 6.9789, 7.1007, 7.0265, 6.9529, 7.0737, 7.0007, 7.1207,\n 7.2399, 7.3584, 7.4762, 7.5933, 7.5204, 7.4482, 7.3765, 7.3054, 7.4215,\n 7.3508, 7.2807, 7.3960, 7.3263, 7.2572, 7.1885, 7.1204, 7.2348, 7.3485,\n 7.2807, 7.3937, 7.5061, 7.6179, 7.5504, 7.4833, 7.4167, 7.3506, 7.2849,\n 7.2197, 7.3305, 7.4407, 7.3758, 7.3113, 7.4208, 7.5297, 7.4655, 7.5738,\n 7.5100, 7.6177, 7.7249, 7.8316, 7.9377, 8.0433, 7.9796, 8.0847, 8.0212,\n 7.9582, 7.8956, 8.0000, 7.9377, 8.0416, 8.1449, 8.2479, 8.1858, 8.2882,\n 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nThe madrigals which Henry plays the lute and sings sound lousy.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "55", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "34.5%", + "z-score": "1.63", + "p value": "0.051", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.8729, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.6348])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "173", + "Fraction of T in Greenlist": "86.9%", + "z-score": "20.2", + "p value": "7.77e-91", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 7.7232, 7.9048, 8.0829, 8.2577, 8.4293, 8.5979, 8.3984,\n 8.5659, 8.7305, 8.8926, 9.0520, 9.2091, 9.3638, 9.5163, 9.6667,\n 9.8150, 9.6347, 9.7823, 9.9279, 10.0718, 10.2138, 10.3540, 10.1840,\n 10.3237, 10.4618, 10.5982, 10.7331, 10.8666, 10.9985, 11.1291, 11.2583,\n 11.3862, 11.2286, 11.3561, 11.4823, 11.6073, 11.7311, 11.8538, 11.7031,\n 11.8254, 11.9466, 12.0667, 12.1857, 12.3037, 12.4207, 12.5367, 12.6517,\n 12.7659, 12.6240, 12.7379, 12.8508, 12.9628, 13.0740, 13.1844, 13.0477,\n 13.1578, 13.2671, 13.3755, 13.4832, 13.5901, 13.6963, 13.8017, 13.9064,\n 14.0104, 13.8804, 13.9842, 14.0873, 14.1898, 14.2915, 14.3927, 14.2667,\n 14.3676, 14.4679, 14.5676, 14.6667, 14.7651, 14.8630, 14.9603, 15.0570,\n 15.1532, 15.0325, 15.1285, 15.2240, 15.3189, 15.4133, 15.5072, 15.3898,\n 15.4835, 15.5767, 15.6694, 15.7617, 15.8534, 15.9447, 16.0355, 16.1258,\n 16.2157, 16.1026, 16.1923, 16.2816, 16.3705, 16.4589, 16.5469, 16.4364,\n 16.5243, 16.6118, 16.6988, 16.7855, 16.8717, 16.9576, 17.0430, 17.1281,\n 17.2127, 17.1059, 17.1905, 17.2747, 17.3585, 17.4420, 17.5251, 17.4205,\n 17.5035, 17.5862, 17.6685, 17.7504, 17.8320, 17.9133, 17.9942, 18.0748,\n 18.1551, 18.0536, 18.1338, 18.2137, 18.2933, 18.3725, 18.4515, 18.3519,\n 18.4308, 18.5094, 18.5876, 18.6656, 18.7432, 18.8206, 18.8977, 18.9745,\n 19.0510, 18.9541, 19.0306, 19.1067, 19.1826, 19.2582, 19.3336, 19.2384,\n 19.3137, 19.3887, 19.4634, 19.5379, 19.6122, 19.6861, 19.7599, 19.8333,\n 19.9066, 19.8137, 19.8869, 19.9598, 20.0325, 20.1049, 20.1771])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nTom picked these grapes, and I washed some turnips, and Suzie will prepare these grapes.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.6058, -0.6473, -0.6885, -0.7295, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhere did you go and who ate what?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.1654, 8.3267, 8.4857, 8.6424, 8.4856, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.0924, 8.9489,\n 9.0947, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.9351, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.3347, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.4704, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.0810, 11.1990, 11.3161, 11.4323, 11.3189, 11.4345, 11.3228,\n 11.2124, 11.1033, 11.2187, 11.1111, 11.0047, 11.1197, 11.0147, 11.1291,\n 11.0254, 10.9229, 10.8215, 10.7211, 10.8353, 10.9488, 11.0615, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.5157, 11.6242, 11.5271, 11.6351,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 11.9754, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.9249,\n 12.8359, 12.7476, 12.6601, 12.7581, 12.6713, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.6103, 12.7073, 12.8037, 12.7199, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.4859, 13.5781,\n 13.6698, 13.7612, 13.6796, 13.5985, 13.6896, 13.7803, 13.8707, 13.7904,\n 13.8804, 13.8007, 13.8904, 13.8113, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.2546, 14.3422, 14.2640, 14.3513, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhich boy's did we elect guardian's employer president?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "22", + "Fraction of T in Greenlist": "20.2%", + "z-score": "-1.16", + "p value": "0.877", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 4.9640, 5.2463, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 5.8890, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 6.7338, 6.5350, 6.3434, 6.1584, 6.3594, 6.5561, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 7.8923, 7.7517, 7.9097, 7.7723, 7.9286, 8.0829,\n 7.9489, 8.1016, 7.9704, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.4304, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.1860, 11.2966, 11.4065, 11.5157, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.7533, 11.8594, 11.7647, 11.8704, 11.9754, 12.0798,\n 12.1836, 12.0902, 12.1936, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.5188, 12.6190, 12.5289, 12.6287, 12.7279, 12.8267, 12.7376,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.0400, 13.1364, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.3447, 13.4390, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.7350, 13.8270, 13.7434, 13.8350, 13.9262,\n 14.0170, 13.9343, 14.0248, 14.1149, 14.2046, 14.2939, 14.2121, 14.3011,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.5726, 14.4923, 14.4126, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.7673, 14.6889, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nHow sane is Peter?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.9658, 0.9115, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 0.8847, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 1.0000, 0.9492, 1.0879, 1.0371, 1.1746, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 1.0598, 1.0105, 1.1447, 1.0954,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 0.9858, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.2244, 1.1790, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.2883, 1.4093, 1.5298, 1.4846, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.5924, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 6.9286, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.5331, 7.4061, 7.2815, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 7.9216, 8.0632, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.0822, 8.2178, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.3138, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.6976, 8.6035, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.3380, 9.2554, 9.3686, 9.4812, 9.3993, 9.5112, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.7701, 9.6921, 9.6148, 9.7224, 9.8293, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 10.0231, 9.9481, 9.8736, 9.9778,\n 10.0814, 10.1846, 10.1106, 10.2132, 10.3154, 10.2419, 10.3435, 10.2706,\n 10.1981, 10.1262, 10.2273, 10.3280, 10.2565, 10.3566, 10.2856, 10.3853,\n 10.4846, 10.4140, 10.5128, 10.4427, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.5974, 10.6944, 10.7910, 10.7222, 10.8184, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI live at the place where Route 150 crosses the River and my dad lives at the place where Route 150 crosses the Hudson River too.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "19.5%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.1785,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.6473, -1.6898, -1.5396,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.7454, -1.6025])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.8", + "p value": "3.68e-50", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.7039, 8.5206, 8.6828, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.4198, 9.2582,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.9653, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.0701,\n 9.9333, 9.7989, 9.9333, 9.8015, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.1999, 10.3287, 10.2030, 10.3310, 10.4579, 10.5837, 10.4608, 10.5859,\n 10.7098, 10.8327, 10.9546, 10.8347, 10.7164, 10.8379, 10.9585, 11.0780,\n 10.9621, 11.0810, 10.9669, 10.8542, 10.9727, 11.0902, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.5601, 11.6723, 11.7838,\n 11.8944, 12.0044, 11.8982, 11.7932, 11.9029, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.0223, 11.9213, 12.0286, 12.1353, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 12.8766, 12.7812, 12.8819, 12.9820, 13.0815, 12.9874, 13.0866,\n 12.9935, 12.9011, 13.0000, 13.0984, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.7870, 13.6990, 13.7926, 13.8857, 13.9784, 13.8914, 13.9838, 13.8976,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.3642, 14.4536, 14.3700, 14.4591, 14.5479, 14.6362, 14.7242, 14.6416,\n 14.5595, 14.6473, 14.7348, 14.8219, 14.7406, 14.8274, 14.7468, 14.6667,\n 14.7533, 14.8396, 14.7601, 14.8462])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI live at the place where Route 150 crosses the Hudson River and my dad lives at it too.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.28", + "p value": "0.899", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "150", + "Fraction of T in Greenlist": "75.4%", + "z-score": "16.4", + "p value": "7.87e-61", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 6.8127, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 7.7232, 7.9048, 8.0829, 8.2577, 8.0546, 8.2281, 8.0333,\n 7.8445, 8.0178, 8.1882, 8.0076, 8.1763, 8.3423, 8.5057, 8.3333,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.5406, 9.6838, 9.8254, 9.6743, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.0820, 10.2172, 10.3510, 10.4834, 10.6145,\n 10.7442, 10.8727, 11.0000, 11.1261, 10.9878, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.7104, 11.5799, 11.6988, 11.8168,\n 11.9338, 11.8065, 11.9230, 11.7978, 11.6743, 11.7907, 11.9062, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.0089, 12.1216, 12.2336, 12.1171, 12.2286,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.7743, 12.6611, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.5940, 13.6950, 13.7953, 13.8952, 13.9944, 14.0930, 14.1911, 14.2887,\n 14.3857, 14.2796, 14.1746, 14.0705, 14.1677, 14.0649, 13.9630, 13.8621,\n 13.7621, 13.6630, 13.5647, 13.6626, 13.7599, 13.6629, 13.7599, 13.8564,\n 13.7606, 13.6656, 13.7619, 13.8577, 13.9530, 14.0479, 14.1422, 14.2361,\n 14.3295, 14.4225, 14.5150, 14.4222, 14.5144, 14.6062, 14.6976, 14.6059,\n 14.6970, 14.7877, 14.8779, 14.9677, 14.8773, 14.9669, 15.0560, 15.1448,\n 15.2332, 15.3211, 15.4087, 15.4959, 15.5828, 15.4942, 15.5808, 15.4929,\n 15.5793, 15.6653, 15.7509, 15.8362, 15.9211, 16.0057, 16.0900, 16.0036,\n 15.9179, 16.0020, 16.0858, 16.1693, 16.2525, 16.3353, 16.4178, 16.3333,\n 16.4156, 16.3318, 16.4139, 16.4957, 16.4127, 16.4943, 16.4118])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWho is she trying to make up to now?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -2.3850, -2.4495,\n -2.5126, -2.5744, -2.6349, -2.6943, -2.7526, -2.8098, -2.4962, -2.5560,\n -2.6148, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.2738, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.2743, -2.3301, -2.3851, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.5820,\n -2.6302, -2.6778, -2.4715, -2.5198, -2.5675, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.7608, -2.8039, -2.8465, -2.6667, -2.7097, -2.7524, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -3.1008, -2.9369, -2.9762, -3.0151, -2.8536, -2.8928, -2.9317, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.1071, -3.1433, -3.1794, -3.0315, -3.0677, -3.1038,\n -2.9576, -2.9938, -3.0298, -3.0657, -3.1013, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.3075,\n -3.3415, -3.3754, -3.4091, -3.2705, -3.3044, -3.3381, -3.2009, -3.2348,\n -3.2685, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.2488, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.4526, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 8.9178, 8.8304, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.3617, 9.2768, 9.1927, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.7044, 9.6225,\n 9.5413, 9.4608, 9.5714, 9.6814, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.0353, 9.9562, 9.8776, 9.7997, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.2790, 10.2029, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.7415, 10.6665, 10.5921, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 10.9685, 10.8961,\n 10.8241, 10.7527, 10.8505, 10.9480, 11.0450, 11.1415, 11.2376, 11.3333,\n 11.2624, 11.1919, 11.1218, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWind was gotten of a plot to negotiate an honorable end to the war in Vietnam.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.6768, 0.6222, 0.5680, 0.5143, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.8978, 0.8447, 0.7921,\n 0.9372, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.0879, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.6827, 1.6336, 1.5848, 1.5363, 1.6632, 1.6148, 1.5667,\n 1.6925, 1.6444, 1.7693, 1.7213, 1.6737, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.7780, 1.8999, 1.8527, 1.8058, 1.7592, 1.8799, 1.8333,\n 1.9533, 1.9068, 2.0259, 1.9795, 1.9333, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nMike talked about politics yesterday to my friends.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "110", + "# Tokens in Greenlist": "71", + "Fraction of T in Greenlist": "64.5%", + "z-score": "9.58", + "p value": "4.93e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962, 4.9010, 5.1711,\n 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997, 6.3509, 6.5672, 6.7778,\n 6.5465, 6.3255, 6.1137, 6.3254, 6.5320, 6.7338, 6.9310, 7.1241, 6.9282,\n 7.1187, 7.3054, 7.4885, 7.6681, 7.8445, 8.0178, 7.8360, 7.6594, 7.8320,\n 8.0018, 8.1689, 8.3333, 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.6424,\n 8.7970, 8.6410, 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834,\n 9.5263, 9.6676, 9.8072, 9.6632, 9.5219, 9.3831, 9.2469, 9.3871, 9.5258,\n 9.3927, 9.5304, 9.4000, 9.2717, 9.4087, 9.5443, 9.4188, 9.2952, 9.4301,\n 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.7181, 9.6011, 9.7306, 9.8590,\n 9.7442, 9.6309, 9.5191, 9.4088, 9.2999, 9.1924, 9.0863, 8.9815, 9.1101,\n 9.2376, 9.3641, 9.4896, 9.6141, 9.7376, 9.6348, 9.7574, 9.6559, 9.5556,\n 9.4563, 9.5784])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nIt was expected by the reporters that the principal would fire some teacher.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.0767, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.4035, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.5304, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.9369, -2.7744, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.9625, -2.8095, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.7361, -2.7735, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -3.0657, -2.9215, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.3044, -3.1669, -3.0302, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "163", + "Fraction of T in Greenlist": "81.9%", + "z-score": "18.5", + "p value": "4.91e-77", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.8041, 6.4902, 6.1968, 5.9214, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.4293, 8.5979, 8.7636,\n 8.9265, 9.0869, 9.2447, 9.4002, 9.5534, 9.7043, 9.5163, 9.3333,\n 9.1551, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 10.1585, 9.9969, 10.1368, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 11.2022, 11.3293, 11.4551, 11.5797, 11.7031,\n 11.8254, 11.9466, 11.8000, 11.6559, 11.5142, 11.3747, 11.4974, 11.6189,\n 11.7395, 11.8589, 11.9774, 12.0949, 12.2114, 12.3269, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.6493, 12.7609, 12.8717, 12.9817, 13.0909, 13.1993,\n 13.3070, 13.4139, 13.5200, 13.6255, 13.7302, 13.8342, 13.7100, 13.5873,\n 13.4661, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 14.0669, 14.1673, 14.0518, 14.1519, 14.2514, 14.3503, 14.4487, 14.5465,\n 14.6437, 14.7404, 14.8365, 14.9321, 15.0272, 15.1217, 15.2158, 15.3093,\n 15.4024, 15.4949, 15.3852, 15.2766, 15.1690, 15.0624, 15.1556, 15.2483,\n 15.3405, 15.4323, 15.5236, 15.6144, 15.7048, 15.7948, 15.6911, 15.7809,\n 15.8702, 15.9591, 16.0476, 16.1357, 16.2233, 16.3106, 16.3975, 16.4839,\n 16.5700, 16.6557, 16.7410, 16.8259, 16.9105, 16.9947, 16.8953, 16.7968,\n 16.6991, 16.6021, 16.6868, 16.7711, 16.8550, 16.9386, 17.0218, 17.1047,\n 17.1873, 17.2695, 17.1748, 17.2568, 17.3386, 17.4200, 17.5011, 17.5818,\n 17.6623, 17.7424, 17.8223, 17.9018, 17.9810, 18.0599, 18.1386, 18.2169,\n 18.2949, 18.3727, 18.2813, 18.1905, 18.1003, 18.0107, 18.0888, 18.1667,\n 18.2442, 18.3215, 18.3985, 18.4752, 18.5517, 18.6278, 18.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhich hat did Mike quip that she never wore?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -0.8978, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.9245, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "199", + "Fraction of T in Greenlist": "100.0%", + "z-score": "24.4", + "p value": "3.76e-132", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 9.1652, 9.3274, 9.4868, 9.6437, 9.7980,\n 9.9499, 10.0995, 10.2470, 10.3923, 10.5357, 10.6771, 10.8167, 10.9545,\n 11.0905, 11.2250, 11.3578, 11.4891, 11.6189, 11.7473, 11.8743, 12.0000,\n 12.1244, 12.2474, 12.3693, 12.4900, 12.6095, 12.7279, 12.8452, 12.9615,\n 13.0767, 13.1909, 13.3041, 13.4164, 13.5277, 13.6382, 13.7477, 13.8564,\n 13.9642, 14.0712, 14.1774, 14.2829, 14.3875, 14.4914, 14.5945, 14.6969,\n 14.7986, 14.8997, 15.0000, 15.0997, 15.1987, 15.2971, 15.3948, 15.4919,\n 15.5885, 15.6844, 15.7797, 15.8745, 15.9687, 16.0624, 16.1555, 16.2481,\n 16.3401, 16.4317, 16.5227, 16.6132, 16.7033, 16.7929, 16.8819, 16.9706,\n 17.0587, 17.1464, 17.2337, 17.3205, 17.4069, 17.4929, 17.5784, 17.6635,\n 17.7482, 17.8326, 17.9165, 18.0000, 18.0831, 18.1659, 18.2483, 18.3303,\n 18.4120, 18.4932, 18.5742, 18.6548, 18.7350, 18.8149, 18.8944, 18.9737,\n 19.0526, 19.1311, 19.2094, 19.2873, 19.3649, 19.4422, 19.5192, 19.5959,\n 19.6723, 19.7484, 19.8242, 19.8997, 19.9750, 20.0499, 20.1246, 20.1990,\n 20.2731, 20.3470, 20.4206, 20.4939, 20.5670, 20.6398, 20.7123, 20.7846,\n 20.8567, 20.9284, 21.0000, 21.0713, 21.1424, 21.2132, 21.2838, 21.3542,\n 21.4243, 21.4942, 21.5639, 21.6333, 21.7025, 21.7715, 21.8403, 21.9089,\n 21.9773, 22.0454, 22.1133, 22.1811, 22.2486, 22.3159, 22.3830, 22.4499,\n 22.5167, 22.5832, 22.6495, 22.7156, 22.7816, 22.8473, 22.9129, 22.9783,\n 23.0434, 23.1084, 23.1733, 23.2379, 23.3024, 23.3666, 23.4307, 23.4947,\n 23.5584, 23.6220, 23.6854, 23.7487, 23.8118, 23.8747, 23.9374, 24.0000,\n 24.0624, 24.1247, 24.1868, 24.2487, 24.3105, 24.3721, 24.4336])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWhich girl did Mike quip never wore this hat?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.3404, 0.4845, 0.4345, 0.5774,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.5952, 0.5477,\n 0.5005, 0.6351, 0.5879, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.7336, 0.6885, 0.6437, 0.7703, 0.7255, 0.8513,\n 0.8065, 0.7620, 0.8866, 0.8422, 0.7979, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "160", + "Fraction of T in Greenlist": "80.4%", + "z-score": "18", + "p value": "4.02e-73", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 6.7778, 6.9830, 7.1832, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 7.8780, 8.0546, 8.2281, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 8.8426, 9.0000,\n 9.1551, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 9.7119, 9.8553, 9.9969, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.6600, 10.5096, 10.6421, 10.7732, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.2510, 11.3747, 11.4974, 11.3608,\n 11.4829, 11.6039, 11.7239, 11.8429, 11.9609, 12.0779, 11.9464, 12.0630,\n 12.1786, 12.0499, 12.1651, 12.2794, 12.3928, 12.5053, 12.6170, 12.7279,\n 12.6035, 12.7140, 12.8237, 12.7017, 12.8110, 12.9196, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.2280, 13.3333, 13.4379, 13.3217, 13.4259, 13.5295,\n 13.6324, 13.7347, 13.8364, 13.9375, 13.8244, 13.9251, 14.0253, 13.9140,\n 14.0139, 14.1131, 14.2118, 14.3099, 14.4075, 14.5045, 14.3961, 14.4928,\n 14.5890, 14.4822, 14.5781, 14.6736, 14.7685, 14.8629, 14.9568, 15.0502,\n 14.9459, 15.0391, 15.1318, 15.0289, 15.1213, 15.2134, 15.3049, 15.3960,\n 15.4867, 15.5769, 15.4762, 15.5662, 15.6558, 15.5563, 15.6457, 15.7346,\n 15.8232, 15.9113, 15.9990, 16.0863, 15.9889, 16.0760, 16.1628, 16.0665,\n 16.1531, 16.2392, 16.3250, 16.4104, 16.4954, 16.5801, 16.4857, 16.5702,\n 16.6543, 16.5610, 16.6450, 16.7286, 16.8118, 16.8948, 16.9774, 17.0596,\n 16.9680, 17.0500, 17.1318, 17.0411, 17.1227, 17.2040, 17.2850, 17.3656,\n 17.4460, 17.5260, 17.4369, 17.5168, 17.5963, 17.5081, 17.5875, 17.6667,\n 17.7455, 17.8241, 17.9023, 17.9803, 17.8935, 17.9714, 18.0489])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nWe donated wire for the convicts to build cages with.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "170", + "Fraction of T in Greenlist": "85.4%", + "z-score": "19.7", + "p value": "1.42e-86", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.4885, 7.6681,\n 7.8445, 8.0178, 8.1882, 8.0076, 7.8320, 7.6613, 7.8320, 8.0000,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 9.1002, 8.9456, 9.0949, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 10.2172, 10.3510, 10.4834, 10.6145,\n 10.7442, 10.8727, 11.0000, 10.8612, 10.9878, 11.1132, 11.2376, 11.3608,\n 11.4829, 11.6039, 11.7239, 11.8429, 11.7104, 11.8289, 11.9464, 12.0630,\n 12.1786, 12.2933, 12.4072, 12.5201, 12.6322, 12.5053, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.3769, 13.2549, 13.3609,\n 13.4661, 13.5707, 13.6746, 13.7778, 13.8803, 13.9822, 14.0835, 13.9659,\n 14.0669, 14.1673, 14.2671, 14.3663, 14.4649, 14.5629, 14.6604, 14.7573,\n 14.6437, 14.7404, 14.8365, 14.9321, 15.0272, 15.1217, 15.2158, 15.3093,\n 15.4024, 15.2924, 15.3852, 15.4776, 15.5695, 15.6609, 15.7519, 15.8424,\n 15.9324, 16.0220, 15.9153, 16.0048, 16.0938, 16.1824, 16.2705, 16.3583,\n 16.4456, 16.5325, 16.6190, 16.5153, 16.6017, 16.6877, 16.7733, 16.8585,\n 16.9434, 17.0279, 17.1120, 17.1957, 17.0948, 17.1784, 17.2616, 17.3445,\n 17.4271, 17.5093, 17.5912, 17.6727, 17.7539, 17.6556, 17.7367, 17.8174,\n 17.8979, 17.9780, 18.0578, 18.1373, 18.2165, 18.2954, 18.1994, 18.2782,\n 18.3566, 18.4348, 18.5127, 18.5903, 18.6676, 18.7447, 18.8214, 18.7276,\n 18.8043, 18.8807, 18.9568, 19.0326, 19.1082, 19.1835, 19.2586, 19.3333,\n 19.2416, 19.3163, 19.3908, 19.4650, 19.5389, 19.6126, 19.6860])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI won't have some money.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "1", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-0.577", + "p value": "0.718", + "z-score_at_T": "tensor([-0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.3472, 6.2164, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.2178, 8.3521, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 8.9763, 9.0987, 9.0057, 9.1273, 9.2480, 9.1561, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.8430, 9.7590, 9.6757, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.3289, 10.2516, 10.1749, 10.0987, 10.0231, 10.1273, 10.0523, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.7189, 10.8186, 10.7451, 10.8444, 10.9431, 10.8702, 10.7978, 10.7258,\n 10.8241, 10.9220, 10.8505, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.0261, 11.1218, 11.0521, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nDo you believe the claim that somebody was looking for something?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "13.1%", + "z-score": "-3.89", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.6485, -2.6941, -2.7393, -2.7840, -2.5927,\n -2.6379, -2.4495, -2.4951, -2.5403, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.0317, -3.0714, -2.9035, -2.7369, -2.7775, -2.6128, -2.6536,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -3.0417, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.3282, -3.3637, -3.3989, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.4539, -3.3113, -3.3457, -3.3799, -3.4140, -3.2733, -3.3075,\n -3.3415, -3.3754, -3.4091, -3.4427, -3.4760, -3.5093, -3.5424, -3.5753,\n -3.6080, -3.6407, -3.6731, -3.7055, -3.7376, -3.6021, -3.6345, -3.6667,\n -3.6987, -3.7306, -3.7624, -3.7940, -3.8255, -3.8569, -3.8881])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.7823, 2.0494, 2.3094, 2.5627, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.0290, 2.9055, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.5621, 2.4585, 2.6713, 2.8804, 3.0861,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.7700, 3.6662, 3.5642, 3.4641,\n 3.6522, 3.5533, 3.4562, 3.3607, 3.2667, 3.1743, 3.0833, 3.2660,\n 3.4463, 3.6242, 3.8000, 3.7087, 3.8819, 3.7916, 3.9624, 3.8730,\n 4.0415, 4.2080, 4.3727, 4.5356, 4.4462, 4.6070, 4.7662, 4.9237,\n 4.8347, 4.9904, 5.1444, 5.0562, 4.9691, 5.1212, 5.0350, 5.1855,\n 5.3345, 5.2489, 5.1643, 5.0807, 5.2278, 5.3736, 5.5181, 5.6614,\n 5.5780, 5.7199, 5.8605, 6.0000, 6.1383, 6.2755, 6.4116, 6.5465,\n 6.4632, 6.5970, 6.7298, 6.6471, 6.7788, 6.9094, 7.0391, 7.1678,\n 7.2956, 7.2134, 7.3402, 7.4661, 7.5910, 7.7152, 7.8384, 7.9608,\n 8.0824, 8.0006, 8.1214, 8.2413, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.7831, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.4916, 9.6016, 9.5224, 9.6317, 9.7405,\n 9.8486, 9.9562, 10.0631, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.6397, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.9928, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.3888, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Is the following sentence grammatically acceptable? Answer 'yes' or 'no':\nI won't ask you to believe that he tried to force me to give her any money.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "28.8%", + "z-score": "0.743", + "p value": "0.229", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "141", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "66.7%", + "z-score": "11.4", + "p value": "1.55e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 7.9704, 7.8416, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.4088, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.7376,\n 9.6348, 9.7574, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.5587, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.0389, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.4101, 11.3204, 11.4261])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "matthews_corr_without_watermark": 0.2683870161053685, + "matthews_corr_with_watermark": 0.031219527052723135 + } + } + }, + "mrpc": { + "train": { + "results": [ + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .\nSentence 2: Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.6794,\n 0.8452, 0.7851, 0.9488, 1.1111, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.3231, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.2060, 1.1514, 1.0973, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 1.1711, 1.1183, 1.0659, 1.2070, 1.3472,\n 1.2946, 1.4335, 1.5714, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.3448, 1.4792, 1.4284, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.4427, 1.3933, 1.5236, 1.4743, 1.6036,\n 1.7321, 1.6827, 1.8102, 1.7609, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.8660, 1.8175, 1.9419, 1.8935, 1.8453, 1.9686, 2.0913, 2.0430,\n 2.1648, 2.1167, 2.0688, 2.0212, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 2.0726, 2.1913, 2.1444, 2.2624, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .\nSentence 2: Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.6888, 0.8716, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.5843, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.4857, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.2697, 5.1121,\n 4.9592, 4.8107, 5.0186, 4.8742, 5.0779, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 7.9196, 7.8000, 7.6823, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.5448, 8.4449, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.3630, 10.2743, 10.1865, 10.0995,\n 10.0133, 9.9278, 9.8430, 9.7590, 9.8702, 9.7869, 9.8975, 9.8150,\n 9.9249, 9.8431, 9.9524, 10.0611, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.9091, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.0793, 11.1807, 11.1018, 11.0235, 10.9458, 10.8686,\n 10.9697, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.9176,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.3143, 12.4074, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .\nSentence 2: On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, 0.0000, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, 0.0838, 0.0418, 0.1667,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .\nSentence 2: Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 0.5774,\n 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142, 1.7321, 1.5852, 1.4444,\n 1.3093, 1.6082, 1.4757, 1.7628, 1.6330, 1.5076, 1.3862, 1.6590, 1.5396,\n 1.4237, 1.3112, 1.2019, 1.4606, 1.3525, 1.2472, 1.1446, 1.3926, 1.2910,\n 1.1918, 1.0948, 1.0000, 1.2372, 1.1431, 1.3744, 1.2810, 1.5068, 1.4142,\n 1.3234, 1.2344, 1.4530, 1.6678, 1.5785, 1.4907, 1.4045, 1.6131, 1.5275,\n 1.7321, 1.9335, 2.1320, 2.3276, 2.2404, 2.1546, 2.0702, 2.2611, 2.1773,\n 2.3651, 2.2819, 2.2000, 2.1193, 2.0397, 1.9612, 2.1436, 2.3238, 2.5019,\n 2.4228, 2.3448, 2.2678, 2.1918, 2.3658, 2.2902, 2.2156, 2.1420, 2.0692,\n 1.9973, 2.1669, 2.0954, 2.0247, 1.9548, 1.8856, 2.0517, 1.9829, 1.9149,\n 1.8475, 1.7809, 1.7150, 1.6498, 1.5852, 1.5213, 1.6823, 1.6186, 1.5556,\n 1.4931, 1.4313, 1.5892, 1.5275, 1.6837, 1.6222, 1.5613, 1.7154, 1.6547,\n 1.5945, 1.5348, 1.6865, 1.6271, 1.5681, 1.7179, 1.8665, 1.8074, 1.9545,\n 1.8956, 1.8371, 1.9825, 1.9242, 1.8664, 1.8091, 1.9524, 1.8953, 2.0373,\n 2.1783, 2.3183, 2.2608, 2.3995, 2.5373, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.2535, 2.3891, 2.3333, 2.2780, 2.2230, 2.3570, 2.3022, 2.2478, 2.1938,\n 2.3262, 2.2723, 2.4037, 2.3500, 2.2966, 2.2436, 2.3735, 2.3206, 2.2680,\n 2.2159, 2.1640, 2.2923, 2.2406, 2.1892, 2.1381, 2.2650, 2.2140, 2.1634,\n 2.1131, 2.0631, 2.1884, 2.1385, 2.2630, 2.2132, 2.3368, 2.2871, 2.2377,\n 2.1886, 2.3110, 2.4327, 2.3835, 2.3346, 2.2860, 2.4065, 2.3580, 2.4778,\n 2.5969, 2.7154, 2.8333, 2.7844, 2.7358, 2.6874, 2.8043, 2.7560, 2.8721,\n 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.9%", + "z-score": "11", + "p value": "2.31e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.4839, 7.3051, 7.4838, 7.6594, 7.4878, 7.6613, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.0139, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.9935, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.2055, 9.0896, 9.2229, 9.1088,\n 8.9963, 8.8853, 8.7758, 8.9086, 8.8007, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.3641, 9.2609, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.3333, 9.2351, 9.3582, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.5133, 9.6307, 9.5400, 9.6566, 9.7725, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.1692, 10.0881, 10.1955, 10.1151,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.3289, 10.2516, 10.3557, 10.4594, 10.3827, 10.3065, 10.4097, 10.3341,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.5453, 10.6455, 10.5725, 10.6722, 10.7714, 10.6990, 10.6271, 10.7258,\n 10.6544, 10.5833, 10.6817, 10.7795, 10.7090, 10.6389, 10.7363, 10.8333,\n 10.7637, 10.8602, 10.7910, 10.8872, 10.9829])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .\nSentence 2: PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.0516, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.3453, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.7095, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.8755, 0.8268, 0.7784, 0.9129,\n 1.0465, 1.1794, 1.1305, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.1038, 1.2326, 1.3607, 1.4881, 1.4402, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.5323,\n 1.6555, 1.7780, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.7870, 1.7410, 1.6951, 1.8145, 1.9333, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "38", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "71.1%", + "z-score": "6.56", + "p value": "2.76e-11", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188, 4.3409, 4.0825,\n 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855, 5.4271, 5.6614, 5.4444,\n 5.6737, 5.4678, 5.6921, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 6.1584,\n 6.3594, 6.5561])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .\nSentence 2: With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.0211, -1.0659, -1.1105, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.7974, -1.6645, -1.7025,\n -1.5706, -1.6087, -1.4777, -1.5159, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .\nSentence 2: The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.8%", + "z-score": "-2.67", + "p value": "0.996", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.2735, -2.3238,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.4077, -2.4546, -2.5011, -2.5471, -2.3570,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.3552, -2.4000, -2.4444, -2.4885, -2.3120, -2.3564, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.1884, -2.2323, -2.2758, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.7341, -2.7721, -2.8098,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.7454, -2.7815, -2.8174, -2.8532, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.6830, -2.7187, -2.5820, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.5893, -2.6247, -2.6599, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The DVD-CCA then appealed to the state Supreme Court .\nSentence 2: The DVD CCA appealed that decision to the U.S. Supreme Court .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "23.8%", + "z-score": "-0.218", + "p value": "0.586", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094, 2.1004, 2.4495,\n 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570, 2.6558, 2.9439, 3.2222,\n 3.4915, 3.3235, 3.5839, 3.4219, 3.6742, 3.5176, 3.7626, 3.6108, 3.8490,\n 3.7017, 3.9337, 3.7905, 4.0166, 4.2378, 4.0980, 3.9620, 3.8297, 3.7009,\n 3.9158, 4.1265, 4.0000, 3.8765, 3.7559, 3.9614, 3.8431, 4.0446, 3.9284,\n 3.8146, 4.0119, 4.2060, 4.0937, 3.9837, 4.1740, 4.3614, 4.2528, 4.4371,\n 4.6188, 4.5115, 4.4061, 4.5847, 4.4809, 4.3788, 4.5544, 4.4537, 4.6268,\n 4.5274, 4.6981, 4.6000, 4.7683, 4.6715, 4.8375, 4.7419, 4.6476, 4.8113,\n 4.9731, 5.1332, 5.2915, 5.4482, 5.6032, 5.5090, 5.4160, 5.3243, 5.2338,\n 5.1444, 5.0562, 5.2086, 5.1212, 5.2719, 5.1855, 5.3345, 5.4822, 5.3964,\n 5.3116, 5.4576, 5.3736, 5.2906, 5.4349, 5.3526, 5.4956, 5.4140, 5.5556,\n 5.4747, 5.6149, 5.5348, 5.6737, 5.5942, 5.5155, 5.6530, 5.7894, 5.9247,\n 6.0590, 6.1923, 6.3246, 6.2459, 6.1680, 6.2990, 6.4291, 6.3517, 6.4807,\n 6.4039, 6.5320, 6.4558, 6.3803, 6.5072, 6.4322, 6.5582, 6.6833, 6.8076,\n 6.9310, 6.8564, 6.9789, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.5495, 7.4762, 7.5933, 7.7096, 7.8253, 7.7524, 7.8673, 7.7949,\n 7.9091, 8.0227, 8.1356, 8.2479, 8.1758, 8.2874, 8.3984, 8.5088, 8.4371,\n 8.5469, 8.4757, 8.5848, 8.5141, 8.4439, 8.5524, 8.4826, 8.5905, 8.6978,\n 8.8045, 8.9107, 8.8413, 8.9469, 8.8780, 8.9830, 9.0876, 9.1916, 9.2952,\n 9.2265, 9.3295, 9.4320, 9.5341, 9.4658, 9.5673, 9.4995, 9.6005, 9.5331,\n 9.4661, 9.5666, 9.5000, 9.6000, 9.6996, 9.7987, 9.8974, 9.8311, 9.9294,\n 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .\nSentence 2: Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "36", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "33.3%", + "z-score": "1.15", + "p value": "0.124", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 0.8083, 0.6794, 0.5556,\n 0.8729, 0.7505, 1.0541, 0.9333, 0.8165, 1.1055, 1.3862, 1.2687, 1.1547])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 7.6613, 7.8320, 8.0000,\n 7.8355, 8.0017, 7.8420, 8.0064, 8.1684, 8.0139, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.2536,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.8058, 9.9384, 10.0698,\n 10.1999, 10.0737, 10.2030, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.7098, 10.8327, 10.9546, 10.8347, 10.9559, 11.0761, 10.9585, 11.0780,\n 10.9621, 11.0810, 10.9669, 10.8542, 10.7429, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.3399, 11.4531, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 11.7992, 11.6966, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.2360, 12.1353, 12.2414, 12.3468, 12.2474,\n 12.3524, 12.2541, 12.3586, 12.2615, 12.1652, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.5930, 12.6939, 12.6012, 12.7017,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.3128, 13.4086, 13.3201, 13.4155, 13.5105,\n 13.4230, 13.5176, 13.4308, 13.3447, 13.2593, 13.1745, 13.0903, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.4758, 13.5683, 13.6604, 13.5781,\n 13.6698, 13.5881, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 13.9606,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.3248, 14.2455, 14.3333,\n 14.4208, 14.3422, 14.4294, 14.3513, 14.4382, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .\nSentence 2: Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 0.5774,\n 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142, 1.7321, 1.5852, 1.4444,\n 1.3093, 1.6082, 1.8974, 2.1776, 2.0412, 1.9096, 1.7823, 1.6590, 1.5396,\n 1.8034, 1.6859, 1.5717, 1.8257, 1.7132, 1.6036, 1.8489, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.3333, 1.2372, 1.4697, 1.6977, 1.9215, 1.8240, 1.7285,\n 1.6348, 1.8516, 1.7589, 1.9711, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185,\n 1.7321, 1.6471, 1.5635, 1.4812, 1.6803, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.3641, 1.2910, 1.2189,\n 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847, 0.9180, 0.8520,\n 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071, 0.6448, 0.5832, 0.7543,\n 0.6928, 0.6319, 0.5717, 0.5120, 0.6794, 0.8452, 0.7851, 0.7256, 0.6667,\n 0.6083, 0.5505, 0.4932, 0.6547, 0.5974, 0.5407, 0.4845, 0.4288, 0.5871,\n 0.5315, 0.4763, 0.4216, 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086,\n 0.4611, 0.6124, 0.5592, 0.5064, 0.6558, 0.8040, 0.7509, 0.6983, 0.6460,\n 0.5941, 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.6108, 0.5620, 0.7001,\n 0.6513, 0.7884, 0.7396, 0.8755, 1.0105, 0.9615, 1.0954, 1.0465, 0.9979,\n 1.1305, 1.0820, 1.0338, 0.9858, 1.1169, 1.0690, 1.1991, 1.3284, 1.4570,\n 1.4087, 1.5363, 1.6632, 1.6148, 1.5667, 1.5189, 1.4713, 1.4241, 1.5492,\n 1.5020, 1.4551, 1.4084, 1.5323, 1.4857, 1.4393, 1.3933, 1.5159, 1.6378,\n 1.7592, 1.7128, 1.6667, 1.6208, 1.5752, 1.5298, 1.6496, 1.6042, 1.5592,\n 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.2372, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.3134, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.0684, 4.9528, 5.1326, 5.3100, 5.1962,\n 5.0844, 4.9747, 4.8669, 4.7610, 4.6568, 4.8305, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.5630, 5.4610, 5.3605, 5.5213, 5.6804,\n 5.5811, 5.4832, 5.6401, 5.7955, 5.9491, 6.1012, 6.2517, 6.1546,\n 6.0587, 6.2075, 6.1128, 6.2601, 6.4059, 6.3122, 6.2197, 6.1283,\n 6.2725, 6.1820, 6.3248, 6.4663, 6.6066, 6.7456, 6.8834, 6.7937,\n 6.9303, 6.8414, 6.9768, 7.1111, 7.0231, 7.1563, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.9336, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.2588, 8.1731, 8.2956, 8.2107, 8.1266, 8.0434, 8.1650,\n 8.0824, 8.2032, 8.3231, 8.2413, 8.3605, 8.4788, 8.3977, 8.5153,\n 8.6321, 8.5516, 8.4718, 8.5879, 8.7033, 8.8179, 8.7388, 8.8527,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.4299, 9.3537, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.4513, 9.3774, 9.3040, 9.2311, 9.1587, 9.2651,\n 9.3708, 9.2990, 9.2276, 9.3328, 9.4375, 9.5416, 9.4707, 9.5743,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 10.0448,\n 9.9752, 10.0753, 10.1750, 10.1058, 10.0371, 9.9687, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.2949, 10.2273, 10.1602, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .\nSentence 2: Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.4537, 10.5621, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.0125, 11.1154, 11.2178, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.3610, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.8014, 11.8973, 11.9928, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.0529, 12.1468, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .\nSentence 2: The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: But he added group performance would improve in the second half of the year and beyond .\nSentence 2: De Sole said in the results statement that group performance would improve in the second half of the year and beyond .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.3531, -0.4020, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.4481, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.2623, 11.3740, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.7875, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.2834, 13.3810, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.8642, 13.9585, 14.0524, 14.1458, 14.2388, 14.1481, 14.2408,\n 14.1510, 14.2433, 14.1543, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.3449, 14.2584, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.7966,\n 14.7113, 14.7998, 14.8878, 14.9755, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.4085, 15.4940, 15.5792, 15.4956, 15.5805, 15.4976, 15.5823, 15.6667,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 15.8378, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He told The Sun newspaper that Mr. Hussein 's daughters had British schools and hospitals in mind when they decided to ask for asylum .\nSentence 2: \" Saddam 's daughters had British schools and hospitals in mind when they decided to ask for asylum -- especially the schools , \" he told The Sun .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.7740, -1.6246, -1.4762, -1.5187, -1.5608, -1.6028, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.3608, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.4537, 10.5621, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.0125, 11.1154, 11.2178, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.3610, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.8014, 11.8973, 11.9928, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.0529, 12.1468, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Gyorgy Heizler , head of the local disaster unit , said the coach was carrying 38 passengers .\nSentence 2: The head of the local disaster unit , Gyorgy Heizler , said the coach driver had failed to heed red stop lights .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Rudder was most recently senior vice president for the Developer & Platform Evangelism Business .\nSentence 2: Senior Vice President Eric Rudder , formerly head of the Developer and Platform Evangelism unit , will lead the new entity .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.0137, -2.0656,\n -2.1170, -2.1678, -1.9645, -2.0158, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.5010, -1.5479, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.3862,\n -1.4305, -1.4744, -1.3222, -1.1711, -1.2155, -1.0659, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -0.9802, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 2.8301, 3.0792, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.2064, 4.0825, 4.2848, 4.1633, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.5850, 4.7703, 4.6571, 4.8394, 4.7281, 4.9075,\n 5.0844, 4.9747, 4.8669, 4.7610, 4.9348, 4.8305, 4.7278, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.2981, 5.4610, 5.3605, 5.5213, 5.4222,\n 5.3245, 5.4832, 5.3867, 5.5435, 5.6986, 5.6032, 5.5090, 5.6622,\n 5.8139, 5.7207, 5.8707, 6.0193, 5.9270, 5.8358, 5.9827, 6.1283,\n 6.2725, 6.4153, 6.5569, 6.6973, 6.8364, 6.7456, 6.6559, 6.7937,\n 6.7049, 6.8414, 6.9768, 7.1111, 7.2443, 7.1563, 7.2884, 7.4194,\n 7.3322, 7.4622, 7.3758, 7.5048, 7.4193, 7.5472, 7.4625, 7.3786,\n 7.5056, 7.4225, 7.3402, 7.4661, 7.5910, 7.7152, 7.8384, 7.7567,\n 7.8791, 7.7981, 7.9196, 7.8393, 7.7597, 7.8803, 7.8014, 7.9212,\n 8.0402, 8.1585, 8.0801, 8.1976, 8.3143, 8.2365, 8.3525, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.6556, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.0257, 8.9502, 9.0601, 8.9851, 9.0944, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.2704, 9.3774, 9.3040, 9.4103, 9.3374, 9.2651,\n 9.1932, 9.2990, 9.2276, 9.1567, 9.2619, 9.1915, 9.1215, 9.2261,\n 9.3302, 9.4338, 9.5369, 9.4673, 9.5698, 9.5007, 9.6028, 9.5341,\n 9.4658, 9.5673, 9.4995, 9.6005, 9.7011, 9.8012, 9.7337, 9.8333,\n 9.9325, 9.8654, 9.9641, 10.0624, 9.9957, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: As well as the dolphin scheme , the chaos has allowed foreign companies to engage in damaging logging and fishing operations without proper monitoring or export controls .\nSentence 2: Internal chaos has allowed foreign companies to set up damaging commercial logging and fishing operations without proper monitoring or export controls .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "168", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "14.3%", + "z-score": "-3.21", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.7761, -2.8296, -2.8823, -2.9343, -2.9856, -3.0361, -3.0861,\n -3.1353, -3.1840, -2.9314, -2.9814, -3.0308, -3.0796, -2.8368, -2.8868,\n -2.9361, -2.9848, -3.0330, -3.0806, -3.1277, -3.1743, -3.2204, -3.2660,\n -3.3111, -3.3558, -3.4000, -3.4438, -3.4871, -3.5301, -3.3128, -3.3566,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.3190, -3.3619, -3.4044, -3.4466,\n -3.4883, -3.5298, -3.5708, -3.3708, -3.4125, -3.4538, -3.4949, -3.5355,\n -3.3414, -3.3826, -3.4235, -3.4641, -3.5044, -3.5443, -3.5839, -3.6233,\n -3.4370, -3.2525, -3.2931, -3.3333, -3.3733, -3.4130, -3.4524, -3.4915,\n -3.5303, -3.3526, -3.3918, -3.4308, -3.4694, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.2408, -3.2796, -3.3182, -3.3566, -3.1889, -3.2276, -3.2660,\n -3.3041, -3.3420, -3.3797, -3.4171, -3.4543, -3.4913, -3.3293, -3.3665,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.2348, -3.2717,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.4528, -3.2998, -3.3359, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.1433, -3.1794, -3.2152, -3.2509, -3.1038,\n -3.1396, -3.1753, -3.2107, -3.2460, -3.2811, -3.3160, -3.3508, -3.2071])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.6681, 7.5593, 7.7026, 7.8444, 7.7373, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.0483, 8.1816, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.7927, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.2435, 9.3611, 9.2729, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.8430, 9.7590, 9.6757, 9.7869, 9.7044, 9.6225,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.7175, 10.6397, 10.7423, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 10.8673, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.6217, 11.7169, 11.6441, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Sheena Young of Child , the national infertility support network , hoped the guidelines would lead to a more \" fair and equitable \" service for infertility sufferers .\nSentence 2: Sheena Young , a spokesman for Child , the national infertility support network , said the proposed guidelines should lead to a more \" fair and equitable \" service for infertility sufferers .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "101", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "31.7%", + "z-score": "1.55", + "p value": "0.0604", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 1.0136, 0.9396, 1.1333, 1.3245, 1.2501, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.4027, 1.3308, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.3856, 1.5511])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094, 2.6605, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998, 3.1177, 2.9439, 3.2222,\n 3.4915, 3.7524, 3.5839, 3.8367, 4.0825, 4.3217, 4.1586, 4.0012, 3.8490,\n 3.7017, 3.5590, 3.7905, 4.0166, 4.2378, 4.4544, 4.3142, 4.5260, 4.3894,\n 4.5968, 4.4634, 4.6667, 4.8662, 4.7357, 4.6082, 4.8038, 4.9962, 5.1854,\n 5.0602, 4.9377, 4.8177, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009,\n 5.7735, 5.6573, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.9874, 5.8835, 6.0410, 6.1968, 6.0943,\n 5.9932, 6.1471, 6.0474, 6.1996, 6.3502, 6.2517, 6.4008, 6.3035, 6.2075,\n 6.1128, 6.0193, 5.9270, 6.0740, 6.2197, 6.1283, 6.2725, 6.1820, 6.0927,\n 6.0044, 5.9172, 6.0596, 5.9732, 6.1143, 6.2541, 6.1685, 6.0838, 6.0000,\n 6.1383, 6.2755, 6.4116, 6.3283, 6.4632, 6.5970, 6.7298, 6.6471, 6.5653,\n 6.4842, 6.4040, 6.3246, 6.4558, 6.5861, 6.7155, 6.8439, 6.7648, 6.8922,\n 6.8138, 6.9402, 6.8624, 6.9879, 7.1125, 7.0353, 6.9587, 7.0823, 7.2051,\n 7.3271, 7.2510, 7.1755, 7.1007, 7.2217, 7.3419, 7.4613, 7.3869, 7.5056,\n 7.6235, 7.7407, 7.6667, 7.5933, 7.5204, 7.4482, 7.3765, 7.4927, 7.6082,\n 7.7230, 7.8372, 7.7658, 7.8793, 7.8084, 7.9211, 7.8507, 7.9628, 8.0742,\n 8.0042, 7.9347, 8.0455, 7.9764, 7.9078, 8.0178, 8.1273, 8.2362, 8.1679,\n 8.2762, 8.2084, 8.3161, 8.2486, 8.1817, 8.2887, 8.3952, 8.3286, 8.4345,\n 8.3683, 8.3024, 8.2370, 8.1721, 8.2773, 8.2127, 8.3173, 8.4215, 8.3572,\n 8.2933, 8.2298, 8.3333, 8.4364, 8.5390, 8.4757, 8.5778, 8.6794, 8.7805,\n 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" I think you 'll see a lot of job growth in the next two years , \" he said , adding the growth could replace jobs lost .\nSentence 2: \" I think you 'll see a lot of job growth in the next two years , \" said Mankiw .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "133", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "28.6%", + "z-score": "0.951", + "p value": "0.171", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.4384, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.9238, 0.8617, 0.8003, 0.9671, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.8729,\n 0.8147, 0.9733, 0.9152, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.9512])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.1119, 6.3509,\n 6.0212, 6.2598, 5.9604, 5.6804, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 6.9307, 7.1187, 6.9378,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 7.8355, 8.0017, 7.8420, 8.0064, 8.1684, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.4936, 8.6461, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.9618, 8.8271, 8.9709, 8.8389, 8.9815,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.6156, 9.5021, 9.6309, 9.7586, 9.6470, 9.5368, 9.6638,\n 9.7897, 9.9146, 9.8064, 9.9304, 10.0535, 9.9469, 10.0692, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 10.8012, 10.7074, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.1253, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.4762, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.9341, 11.8503, 11.9504, 11.8673,\n 11.9669, 12.0660, 11.9837, 12.0824, 12.1805, 12.2782, 12.3754, 12.2940,\n 12.3908, 12.3100, 12.4065, 12.5024, 12.4223, 12.5179, 12.4384, 12.5336,\n 12.6283, 12.5495, 12.6439, 12.7378, 12.8313, 12.9244, 12.8464, 12.9391,\n 13.0314, 12.9540, 13.0460, 12.9691, 13.0608, 12.9845, 13.0758, 13.1667,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The new Finder puts a user 's folders , hard drive , network servers , iDisk and removable media in one location , providing one-click access .\nSentence 2: Panther 's redesigned Finder navigation tool puts a user 's favourite folders , hard drive , network servers , iDisk and removable media in one location .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.0303, -2.0739, -1.9107, -1.9545, -1.9980, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -1.9803,\n -2.0224, -2.0642, -1.9098, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.0203, -2.0605,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -1.9291, -1.9688, -2.0083,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -1.9370, -1.9753, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -1.9311, -1.7974, -1.8352, -1.8728,\n -1.7404, -1.7780, -1.6466, -1.6843, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.3411, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.1026, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 11.8151, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.2503, 12.3508, 12.4508, 12.3629,\n 12.4625, 12.5615, 12.6601, 12.5732, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.2895, 13.2068, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.5985, 13.6896, 13.7803, 13.6999, 13.7904,\n 13.8804, 13.9700, 13.8904, 13.8113, 13.7327, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: But tropical storm warnings and watches were posted today for Haiti , western portions of the Dominican Republic , the southeastern Bahamas and the Turk and Caicos islands .\nSentence 2: Tropical storm warnings were in place Thursday for Jamaica and Haiti and watches for the western Dominican Republic , the southeastern Bahamas and the Turks and Caicos islands .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.8783, 1.1547, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.6353, 1.8728, 2.1054, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.4812, 1.4003, 1.5986, 1.7942, 1.9870, 2.1773,\n 2.0948, 2.0135, 1.9333, 1.8543, 1.7765, 1.6997, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.4863, 1.6498, 1.8116,\n 1.9720, 1.9066, 2.0651, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 2.1182, 2.0548, 2.2074, 2.1442, 2.0817, 2.2323, 2.1700, 2.3190,\n 2.2569, 2.1954, 2.3426, 2.2813, 2.4271, 2.5717, 2.5103, 2.4495,\n 2.5925, 2.5318, 2.6735, 2.6131, 2.5532, 2.4938, 2.6336, 2.5744,\n 2.7129, 2.6540, 2.5954, 2.7325, 2.6742, 2.8101, 2.7520, 2.6943,\n 2.8288, 2.7713, 2.9048, 3.0373, 2.9798, 3.1113, 3.2419, 3.1844,\n 3.3140, 3.4428, 3.3853, 3.3282, 3.4558, 3.3989, 3.5256, 3.4689,\n 3.4126, 3.5382, 3.4821, 3.6067, 3.5508, 3.4953, 3.6188, 3.7417,\n 3.6862, 3.8081, 3.7528, 3.6979, 3.8189, 3.7641, 3.8843, 3.8297,\n 3.7755, 3.7216, 3.8406, 3.7869, 3.9052, 3.8516, 3.7984, 3.9158,\n 3.8627, 3.9793, 3.9265, 3.8739, 3.9896, 3.9372, 3.8851, 3.8333,\n 3.7818, 3.7306, 3.6797, 3.6291, 3.7432, 3.6927, 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: A federal magistrate in Fort Lauderdale ordered him held without bail .\nSentence 2: Zuccarini was ordered held without bail Wednesday by a federal judge in Fort Lauderdale , Fla .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.3460, 1.2865, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.5097, 1.4517, 1.6008, 1.5430, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.6646, 1.6081, 1.7522, 1.6958, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.8962, 1.8411, 1.9795, 1.9245,\n 2.0617, 2.0068, 2.1429, 2.0881, 2.0338, 1.9799, 1.9263, 2.0605,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.5731, 1.5236, 1.6530, 1.6036,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.5363, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.2244, 1.1790, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: A BMI of 25 or above is considered overweight ; 30 or above is considered obese .\nSentence 2: A BMI between 18.5 and 24.9 is considered normal , over 25 is considered overweight and 30 or greater is defined as obese .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.0541, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.0702, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.2629, -2.3102, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.0785, -1.8958, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.7823,\n -1.8251, -1.8676, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.7321,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.3779, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "61.3%", + "z-score": "10.6", + "p value": "1.67e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.0037, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.5260, 4.3894, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 4.8038, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.2151, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 8.9496, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.0611, 10.1692, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.5393, 10.4596, 10.5642, 10.4852, 10.5893])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Six months ago , the IMF and Argentina struck a bare-minimum $ 6.8-billion debt rollover deal that expires in August .\nSentence 2: But six months ago , the two sides managed to strike a $ 6.8-billion debt rollover deal , which expires in August .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.1664, -2.2156,\n -2.2644, -2.0692, -2.1183, -2.1669, -1.9757, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.7988, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -1.8071, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -1.9137, -1.7679, -1.8086, -1.8490, -1.7049, -1.7454, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.4485, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.8041, 7.0200, 7.2296, 6.9293, 6.6469, 6.8620, 7.0711,\n 6.8127, 6.5672, 6.3333, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.5144, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.3557, 8.5206, 8.6828, 8.8426, 8.6667,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.4198, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.9795, 9.8254, 9.9653, 10.1036,\n 9.9540, 9.8072, 9.6632, 9.5219, 9.6612, 9.5229, 9.3871, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.5366, 9.4087, 9.5443, 9.6786, 9.5534,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.9727, 10.8616, 10.9794, 11.0963,\n 10.9870, 11.1033, 11.2187, 11.1111, 11.2259, 11.3399, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.9288, 12.0357, 11.9370, 12.0433,\n 11.9457, 11.8491, 11.9551, 12.0605, 12.1652, 12.2694, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.4915, 12.3985, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.9011, 13.0000, 12.9085, 13.0071, 13.1050, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.5039, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.7926, 13.8857, 13.9784, 13.8914, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.1906, 14.2805, 14.1974, 14.1149, 14.2046, 14.2939, 14.2121, 14.3011,\n 14.3897, 14.3087, 14.3970, 14.4850, 14.5726, 14.6599, 14.7468, 14.8333,\n 14.9195, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Inhibited children tend to be timid with new people , objects , and situations , while uninhibited children spontaneously approach them .\nSentence 2: Simply put , shy invividuals tend to be more timid with new people and situations .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "14.7%", + "z-score": "-2.49", + "p value": "0.994", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.2404, -2.2937, -2.0702, -2.1241, -2.1773,\n -1.9596, -2.0135, -2.0667, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.8150, -2.6296, -2.6737, -2.7175,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.4885])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.7%", + "z-score": "14.5", + "p value": "4.22e-48", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.7246, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 11.7907, 11.9062, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.3447, 12.4550,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.7743, 12.8813, 12.9875, 12.8749,\n 12.9807, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.4804, 13.5813, 13.6816, 13.7813, 13.8804,\n 13.9790, 13.8745, 13.9728, 14.0705, 13.9675, 14.0649, 13.9630, 13.8621,\n 13.7621, 13.6630, 13.5647, 13.6626, 13.5654, 13.6629, 13.5668, 13.4715,\n 13.5688, 13.6656, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 14.0488,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.1458, 14.0550, 13.9650, 13.8756,\n 13.9690, 14.0619, 13.9735, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.5226, 14.4355, 14.5257, 14.6155, 14.7049, 14.6188, 14.5333, 14.4484,\n 14.5378, 14.4536, 14.5426, 14.6313, 14.7195, 14.6362, 14.5535, 14.4714,\n 14.5595, 14.4780, 14.5659, 14.6534, 14.7406, 14.6599, 14.5797, 14.5000,\n 14.4208, 14.3422, 14.4294, 14.5162, 14.6027, 14.5248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: I wanted to bring the most beautiful people into the most beautiful building , he said Sunday inside the Grand Central concourse .\nSentence 2: \" I wanted to bring the most beautiful people into the most beautiful building , \" Tunick said Sunday .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.4362, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.1140, -1.1602, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.1375, -1.1794, -1.2210, -1.0820, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.1784, -1.0445,\n -1.0849, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321, 2.1004, 1.9052,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 1.8856, 1.7321, 1.5852, 1.8889,\n 1.7457, 1.6082, 1.4757, 1.3480, 1.6330, 1.5076, 1.3862, 1.2687, 1.1547,\n 1.0441, 0.9366, 0.8321, 1.0954, 0.9918, 0.8909, 0.7924, 0.6963, 0.6025,\n 0.8513, 1.0948, 1.0000, 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999,\n 1.0120, 0.9258, 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 1.2366,\n 1.1547, 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.5133, 1.6997, 1.6239, 1.5492, 1.4755,\n 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.7233, 1.6524, 1.8257,\n 1.7552, 1.6854, 1.8559, 1.7865, 1.7178, 1.6499, 1.5828, 1.7496, 1.6828,\n 1.6166, 1.7809, 1.7150, 1.8773, 1.8116, 1.7467, 1.6823, 1.6186, 1.5556,\n 1.4931, 1.4313, 1.5892, 1.5275, 1.4664, 1.4059, 1.3460, 1.2865, 1.4412,\n 1.5945, 1.5348, 1.6865, 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.5430,\n 1.4857, 1.6330, 1.5758, 1.5191, 1.4629, 1.4071, 1.5519, 1.6958, 1.6398,\n 1.5842, 1.5291, 1.4744, 1.4201, 1.5614, 1.7018, 1.6473, 1.7864, 1.9245,\n 1.8699, 2.0068, 1.9524, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732, 2.0071,\n 1.9540, 1.9013, 1.8490, 1.7970, 1.9291, 2.0604, 2.0083, 2.1386, 2.0866,\n 2.0350, 2.1640, 2.1125, 2.0613, 2.0105, 1.9599, 2.0873, 2.0369, 1.9868,\n 2.1131, 2.0631, 2.1884, 2.1385, 2.0889, 2.0396, 1.9906, 1.9419, 1.8935,\n 1.8453, 1.9686, 1.9206, 1.8728, 1.8252, 1.7780, 1.7310, 1.8527, 1.9738,\n 1.9267, 2.0470, 2.0000, 1.9533, 2.0726, 2.0259, 1.9795, 1.9333, 1.8874,\n 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The broad Standard & Poor 's 500 < .SPX > fell 10.75 points , or 1.02 percent , to 1,039.32 .\nSentence 2: The S & P 500 index was up 1.26 , or 0.1 percent , to 1,039.32 after sinking 10.75 yesterday .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "6.0%", + "z-score": "-6.18", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.4495, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.6833, -2.7351, -2.7863, -2.8368, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -3.2240, -3.2686, -3.3128, -3.3566,\n -3.4000, -3.4429, -3.4855, -3.5277, -3.5695, -3.6109, -3.6520, -3.6927,\n -3.7331, -3.7732, -3.8129, -3.8523, -3.8914, -3.9302, -3.9687, -4.0069,\n -4.0449, -4.0825, -4.1198, -3.9260, -3.9639, -4.0016, -4.0390, -4.0762,\n -4.1131, -4.1497, -4.1861, -4.2222, -4.2581, -4.2938, -4.3292, -4.3644,\n -4.3993, -4.4341, -4.4686, -4.5029, -4.5370, -4.5708, -4.6045, -4.6380,\n -4.6713, -4.7044, -4.7373, -4.7700, -4.8025, -4.8348, -4.8670, -4.8990,\n -4.9308, -4.9624, -4.9939, -5.0252, -5.0563, -5.0873, -5.1181, -5.1488,\n -5.1793, -5.2096, -5.2398, -5.2699, -5.2998, -5.3295, -5.3591, -5.3886,\n -5.4179, -5.4471, -5.4762, -5.5051, -5.5339, -5.5626, -5.5911, -5.6195,\n -5.6478, -5.4899, -5.5185, -5.5470, -5.5754, -5.6036, -5.6318, -5.6598,\n -5.6877, -5.7155, -5.7431, -5.5904, -5.6183, -5.6462, -5.6739, -5.7016,\n -5.7291, -5.7565, -5.7838, -5.8110, -5.8381, -5.8650, -5.8919, -5.9186,\n -5.9453, -5.9718, -5.9983, -6.0246, -6.0509, -6.0770, -6.1031, -6.1290,\n -6.1549, -6.1807, -6.2063, -6.2319, -6.2574, -6.1153, -6.1410, -6.1667,\n -6.1922, -6.2177, -6.2431, -6.2684, -6.2936, -6.3187, -6.1800])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990, 5.1962,\n 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509, 6.0212, 5.7155,\n 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569, 5.8890, 5.6614, 5.4444,\n 5.2372, 5.0389, 4.8488, 5.0811, 4.8990, 4.7237, 4.5547, 4.3916, 4.2339,\n 4.0814, 3.9337, 3.7905, 3.6515, 3.5165, 3.3853, 3.2577, 3.4816, 3.3566,\n 3.2348, 3.1160, 3.0000, 3.2167, 3.1027, 3.3147, 3.2026, 3.0929, 2.9856,\n 3.1918, 3.3947, 3.2883, 3.1840, 3.0817, 3.2796, 3.1787, 3.0796, 2.9823,\n 3.1754, 3.3657, 3.2691, 3.4562, 3.6407, 3.5447, 3.4503, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.0000, 3.1789, 3.3556, 3.2686, 3.1829, 3.0984, 3.0151,\n 3.1879, 3.3587, 3.2757, 3.4442, 3.3619, 3.5282, 3.6927, 3.6107, 3.7732,\n 3.9340, 3.8523, 3.7717, 3.6920, 3.6133, 3.5355, 3.4586, 3.3826, 3.3075,\n 3.4641, 3.6193, 3.5443, 3.4702, 3.3968, 3.3243, 3.2525, 3.1814, 3.1111,\n 3.0415, 2.9726, 2.9044, 2.8368, 2.7699, 2.7037, 2.6381, 2.5731, 2.5087,\n 2.4449, 2.3817, 2.5298, 2.4669, 2.4045, 2.3426, 2.2813, 2.4271, 2.3660,\n 2.5103, 2.4495, 2.3891, 2.3293, 2.4717, 2.6131, 2.5532, 2.4938, 2.4348,\n 2.5744, 2.5156, 2.4574, 2.3995, 2.5373, 2.6742, 2.6163, 2.7520, 2.8868,\n 2.8288, 2.7713, 2.7143, 2.6576, 2.6014, 2.5456, 2.4902, 2.6224, 2.7539,\n 2.6984, 2.6433, 2.5886, 2.5343, 2.6640, 2.7930, 2.7386, 2.8666, 2.8124,\n 2.7585, 2.8853, 2.8316, 2.7783, 2.7253, 2.6726, 2.7979, 2.7454, 2.6932,\n 2.8174, 2.9410, 2.8887, 3.0114, 3.1334, 3.0811, 3.0292, 2.9776, 2.9263,\n 2.8752, 2.8245, 2.7741, 2.8943, 3.0138, 2.9633, 2.9132, 2.8633, 2.9817,\n 2.9320, 2.8825, 3.0000, 2.9507, 3.0674, 3.0182, 2.9692, 2.9205, 2.8721,\n 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Duque will return to Earth Oct. 27 with the station 's current crew , U.S. astronaut Ed Lu and Russian cosmonaut Yuri Malenchenko .\nSentence 2: Currently living onboard the space station are American astronaut Ed Lu and Russian cosmonaut Yuri Malenchenko .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "42", + "# Tokens in Greenlist": "9", + "Fraction of T in Greenlist": "21.4%", + "z-score": "-0.535", + "p value": "0.704", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Singapore is already the United States ' 12th-largest trading partner , with two-way trade totaling more than $ 34 billion .\nSentence 2: Although a small city-state , Singapore is the 12th-largest trading partner of the United States , with trade volume of $ 33.4 billion last year .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.0829, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The largest gains were seen in prices , new orders , inventories and exports .\nSentence 2: Sub-indexes measuring prices , new orders , inventories and exports increased .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 0.8165, 1.0510, 0.9608, 1.1896, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.0919, -0.1374, 0.0000,\n -0.0455, -0.0907, -0.1357, 0.0000, -0.0449, -0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.2182, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.6367, 0.7620, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.8333,\n 0.7896, 0.9119, 0.8682, 0.9897, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774, 0.4201, 0.2722,\n 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714, 0.3464, 0.6794, 0.5556,\n 0.4364, 0.7505, 0.6325, 0.5185, 0.4082, 0.3015, 0.5941, 0.4880, 0.3849,\n 0.2847, 0.5620, 0.4623, 0.7303, 0.6312, 0.5345, 0.4402, 0.3482, 0.6025,\n 0.8513, 0.7579, 0.6667, 0.9073, 0.8165, 1.0510, 0.9608, 1.1896, 1.4142,\n 1.3234, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547,\n 0.5774, 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164, 0.7057,\n 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385, 0.6732, 0.6086,\n 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.9428, 0.8793, 1.0498, 0.9864,\n 1.1547, 1.0915, 1.0290, 0.9671, 0.9058, 0.8452, 0.7851, 0.7256, 0.6667,\n 0.6083, 0.5505, 0.4932, 0.4364, 0.3802, 0.3244, 0.4845, 0.4288, 0.5871,\n 0.5315, 0.4763, 0.6325, 0.5774, 0.5227, 0.6768, 0.6222, 0.5680, 0.5143,\n 0.4611, 0.6124, 0.5592, 0.5064, 0.4540, 0.6030, 0.5507, 0.6983, 0.6460,\n 0.5941, 0.5426, 0.4915, 0.6366, 0.7807, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620, 0.5134,\n 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826, 0.1365, 0.2722,\n 0.2261, 0.3607, 0.3146, 0.2689, 0.4021, 0.3563, 0.3109, 0.4428, 0.3974,\n 0.3522, 0.3073, 0.2626, 0.3928, 0.3482, 0.3038, 0.2596, 0.3884, 0.3443,\n 0.4721, 0.4280, 0.3841, 0.3405, 0.2971, 0.4233, 0.5489, 0.5053, 0.6299,\n 0.7539, 0.7102, 0.8333, 0.7896, 0.9119, 1.0336, 0.9897, 0.9461, 0.9027,\n 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Trading in Loral was halted yesterday ; the shares closed on Monday at $ 3.01 .\nSentence 2: The New York Stock Exchange suspended trading yesterday in Loral , which closed at $ 3.01 Friday .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "28.8%", + "z-score": "0.743", + "p value": "0.229", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.7433])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.2055, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.1706, 9.0629, 8.9567,\n 8.8518, 8.7482, 8.6459, 8.7757, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.1391, 10.0504, 9.9625, 10.0748, 9.9878, 10.0995,\n 10.2106, 10.1243, 10.2348, 10.1494, 10.0647, 9.9807, 9.8975, 9.8150,\n 9.7331, 9.8431, 9.7619, 9.8712, 9.9800, 9.8995, 9.8197, 9.9278,\n 10.0353, 9.9562, 10.0631, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.1173, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.4581, 11.5549, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.8117, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.0419, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Earnings per share from recurring operations will be 13 cents to 14 cents .\nSentence 2: That beat the company 's April earnings forecast of 8 to 9 cents a share .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "10.6%", + "z-score": "-4.71", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.8402,\n -2.8868, -2.9329, -2.9785, -2.7717, -2.8180, -2.8638, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.6485, -2.6941, -2.7393, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -3.1521, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.4499, -3.4879, -3.5256, -3.3566, -3.3947, -3.4325, -3.2660,\n -3.3041, -3.3420, -3.3797, -3.4171, -3.2541, -3.2918, -3.3293, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.6068, -3.6420, -3.6770, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.6635, -3.6980, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.9673, -4.0003, -4.0330, -4.0656, -4.0980,\n -4.1303, -4.1624, -4.1944, -4.2262, -4.0822, -4.1143, -4.1461, -4.1779,\n -4.2094, -4.2409, -4.2722, -4.3033, -4.3343, -4.3652, -4.3959, -4.4265,\n -4.4570, -4.4873, -4.5175, -4.5476, -4.5776, -4.6074, -4.6371, -4.6667,\n -4.6961, -4.7255, -4.5893, -4.6188, -4.6482, -4.6775, -4.7066])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 10.9488, 11.0615, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 10.8960, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.3944, 11.5005, 11.4101, 11.5156, 11.6206, 11.7249, 11.6356, 11.7395,\n 11.6510, 11.7543, 11.6667, 11.5797, 11.4935, 11.4080, 11.3232, 11.4263,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.4209, 11.5217, 11.4411, 11.5414, 11.6412, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.7787, 11.8766, 11.9741, 12.0712, 11.9927, 12.0893, 12.0114,\n 12.1076, 12.0302, 11.9534, 11.8771, 11.8014, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.9380, 11.8638, 11.7901, 11.7169, 11.8117, 11.9060, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.0419, 12.1347, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He plans to have dinner with troops at Kosovo 's U.S. military headquarters , Camp Bondsteel .\nSentence 2: After that , he plans to have dinner at Camp Bondsteel with U.S. troops stationed there .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.1664, -1.9695,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.7988, -1.6166, -1.4362, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.3377, -1.2049, -1.0729, -0.9415, -0.9816, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.6719, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 11.2069, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.6772, 11.5718, 11.4675, 11.5788, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 12.0223, 11.9213, 11.8212, 11.7222, 11.8299, 11.7320, 11.8392,\n 11.9457, 12.0516, 12.1568, 12.2615, 12.3655, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.6800, 12.7812, 12.6867, 12.5930, 12.5001, 12.6012, 12.7017,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.4086, 13.5039, 13.5987, 13.6931,\n 13.7870, 13.6990, 13.7926, 13.8857, 13.9784, 13.8914, 13.8051, 13.7194,\n 13.8120, 13.7270, 13.8193, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.0170, 13.9343, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.7468, 14.8333,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Retailers J.C. Penney Co . Inc . ( JCP ) and Walgreen Co . ( WAG ) kick things off on Monday .\nSentence 2: Retailers J.C. Penney Co . Inc . JCP.N and Walgreen Co . WAG.N kick things off on Monday .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "66", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "18.2%", + "z-score": "-1.28", + "p value": "0.9", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.2792])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.1196, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.7045, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.4685, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.9817, 9.8792, 9.7778, 9.8987, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.2348, 10.1405, 10.0472, 9.9547, 9.8632, 9.7725, 9.8877, 9.7980,\n 9.7091, 9.6210, 9.5338, 9.4474, 9.3617, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.4513, 9.3686, 9.2867, 9.2055, 9.1250, 9.0452,\n 8.9660, 8.8874, 8.8095, 8.7323, 8.6556, 8.5796, 8.5041, 8.6166,\n 8.5417, 8.6535, 8.5792, 8.5054, 8.6165, 8.5433, 8.4706, 8.3984,\n 8.3268, 8.2557, 8.1851, 8.1150, 8.2252, 8.3349, 8.4439, 8.5524,\n 8.4826, 8.4133, 8.3446, 8.2762, 8.2084, 8.1410, 8.2486, 8.1817,\n 8.2887, 8.3952, 8.3286, 8.2624, 8.1966, 8.1312, 8.0663, 8.1721,\n 8.1075, 8.2127, 8.1485, 8.0847, 8.0212, 8.1258, 8.2298, 8.3333,\n 8.2702, 8.3732, 8.4757, 8.5778, 8.5148, 8.6164, 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Prosecutors filed a motion informing Lee they intend to seek the death penalty .\nSentence 2: He added that prosecutors will seek the death penalty .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.9245, -0.9676, -0.8268, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.7462, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -0.8914, -0.9313, -0.9711, -1.0106, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.1881, 8.0667, 8.2121, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.5552, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.4083, 10.5236, 10.6380, 10.5409,\n 10.6547, 10.5587, 10.4636, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.9462, 11.0554, 10.9637, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.5632, 11.4762, 11.5797, 11.6827, 11.7851, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.9181, 12.0185, 11.9341, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.3603, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.4872, 12.4065, 12.5024, 12.5979, 12.6930, 12.6130, 12.7077,\n 12.6283, 12.7226, 12.8165, 12.9099, 12.8313, 12.7532, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.3473, 13.2717, 13.3615, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Last year the court upheld Cleveland 's school voucher program , ruling 5-4 that vouchers are constitutional if they provide parents a choice of religious and secular schools .\nSentence 2: Last year , the court ruled 5-4 in an Ohio case that government vouchers are constitutional if they provide parents with choices among a range of religious and secular schools .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.9771, 1.1926, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.6471, 1.8477, 1.7634, 1.9604, 2.1546, 2.0702, 2.2611, 2.1773,\n 2.3651, 2.2819, 2.2000, 2.1193, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.7951, 1.9695,\n 1.8972, 1.8257, 1.9973, 2.1669, 2.0954, 2.0247, 1.9548, 1.8856,\n 2.0517, 1.9829, 1.9149, 1.8475, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.6742, 0.6274, 0.5808, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.1650, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415, 3.7808, 3.5382,\n 3.8411, 3.6148, 3.4017, 3.2004, 3.4912, 3.2998, 3.5796, 3.3968, 3.6667,\n 3.9279, 4.1812, 4.0056, 4.2515, 4.0825, 3.9196, 3.7626, 3.6108, 3.4641,\n 3.7017, 3.5590, 3.4207, 3.6515, 3.8772, 3.7417, 3.6098, 3.8297, 4.0451,\n 4.2563, 4.4634, 4.6667, 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997,\n 5.3716, 5.5549, 5.4295, 5.3067, 5.1864, 5.3666, 5.2485, 5.4259, 5.3100,\n 5.4848, 5.3709, 5.2590, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.6667, 5.8279, 5.7242, 5.6220, 5.5213, 5.6804, 5.5811,\n 5.7382, 5.6401, 5.7955, 5.9491, 6.1012, 6.0041, 6.1546, 6.3035, 6.4510,\n 6.5970, 6.5008, 6.4059, 6.3122, 6.4566, 6.5997, 6.5069, 6.4153, 6.3248,\n 6.4663, 6.6066, 6.7456, 6.6559, 6.7937, 6.9303, 6.8414, 6.7536, 6.6667,\n 6.8019, 6.9361, 7.0692, 7.2012, 7.1149, 7.0296, 6.9451, 6.8615, 6.9923,\n 6.9094, 7.0391, 6.9570, 7.0857, 7.2134, 7.3402, 7.4661, 7.3845, 7.3037,\n 7.4286, 7.3485, 7.4724, 7.5955, 7.5161, 7.6383, 7.7597, 7.6808, 7.8014,\n 7.7232, 7.8429, 7.7653, 7.6883, 7.6120, 7.7308, 7.6551, 7.5800, 7.5056,\n 7.6235, 7.5495, 7.6667, 7.5933, 7.7096, 7.8253, 7.9403, 7.8673, 7.9816,\n 7.9091, 7.8372, 7.7658, 7.6950, 7.6246, 7.7380, 7.6681, 7.5988, 7.7114,\n 7.8233, 7.7544, 7.6859, 7.7971, 7.9078, 8.0178, 8.1273, 8.2362, 8.3446,\n 8.2762, 8.3840, 8.4911, 8.5978, 8.7039, 8.6359, 8.7414, 8.6738, 8.6066,\n 8.5399, 8.6448, 8.5785, 8.6828, 8.6169, 8.7207, 8.6551, 8.5899, 8.6932,\n 8.6284, 8.5640, 8.6667, 8.7689, 8.8706, 8.9718, 9.0726, 9.1730, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He beat testicular cancer that had spread to his lungs and brain .\nSentence 2: Armstrong , 31 , battled testicular cancer that spread to his brain .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "20.0%", + "z-score": "-1.46", + "p value": "0.928", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.4313, -1.4796, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.0812, -1.1263, -0.9759, -1.0211, -1.0659, -1.1105, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Sorkin , who faces charges of conspiracy to obstruct justice and lying to a grand jury , was to have been tried separately .\nSentence 2: Sorkin was to have been tried separately on charges of conspiracy and lying to a grand jury .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.3582, 9.4803, 9.3834,\n 9.2874, 9.1925, 9.0987, 9.2202, 9.1273, 9.0354, 8.9444, 8.8544,\n 8.7652, 8.6770, 8.5896, 8.5030, 8.4173, 8.5381, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 8.8778, 8.7952, 8.7133,\n 8.6321, 8.5516, 8.4718, 8.3927, 8.3143, 8.4303, 8.3525, 8.2754,\n 8.1988, 8.1229, 8.0476, 7.9729, 7.8988, 7.8253, 7.7524, 7.6800,\n 7.7949, 7.9091, 7.8372, 7.7658, 7.6950, 7.6246, 7.5548, 7.4855,\n 7.4168, 7.3485, 7.4616, 7.3937, 7.5061, 7.4386, 7.5504, 7.4833,\n 7.4167, 7.3506, 7.2849, 7.2197, 7.1549, 7.2656, 7.3758, 7.4853,\n 7.5944, 7.5297, 7.6381, 7.5738, 7.5100, 7.4465, 7.3835, 7.3208,\n 7.2585, 7.1967, 7.1352, 7.0741, 7.0133, 6.9530, 6.8930, 6.8333,\n 6.7740, 6.7151, 6.8219, 6.7632, 6.7049, 6.8111, 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.3257, 10.4444, 10.5623, 10.6793, 10.5763, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.5515, 11.4574, 11.3642, 11.4714, 11.3791, 11.2877,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.4261, 11.3373, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.7851, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.9341, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.3603, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.4065, 12.5024, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.0030, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.1233, 13.2149, 13.1376, 13.2288, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.3615, 13.2864, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Graves reported from Albuquerque , Villafranca from Austin and Ratcliffe from Laredo .\nSentence 2: Pete Slover reported from Laredo and Gromer Jeffers from Albuquerque .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.6660, -1.7150, -1.5360, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.5695, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.5477,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.0838, -0.1253, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.0000, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The US chip market is expected to decline 2.1 percent this year , then grow 15.7 percent in 2004 .\nSentence 2: The Americas market will decline 2.1 percent to $ 30.6 billion in 2003 , and then grow 15.7 percent to $ 35.4 billion in 2004 .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "94", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "17.0%", + "z-score": "-1.79", + "p value": "0.963", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.7408, 1.9795, 1.8728, 1.7685, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.7002, 1.6131, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.6803, 1.8766, 1.7942, 1.7130, 1.9052,\n 2.0948, 2.2819, 2.4667, 2.3842, 2.5660, 2.7456, 2.9231, 2.8402,\n 3.0151, 3.1879, 3.3587, 3.2757, 3.4442, 3.3619, 3.5282, 3.6927,\n 3.8555, 3.7732, 3.9340, 4.0931, 4.2507, 4.4066, 4.5611, 4.7140,\n 4.8655, 5.0156, 5.1643, 5.3116, 5.2278, 5.1450, 5.2906, 5.4349,\n 5.5780, 5.7199, 5.8605, 5.7778, 5.9171, 6.0553, 6.1924, 6.3283,\n 6.4632, 6.3807, 6.5144, 6.4327, 6.5653, 6.6968, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.7155, 6.8439, 6.7648, 6.6865, 6.6089, 6.5320,\n 6.6591, 6.5828, 6.5072, 6.6332, 6.5582, 6.4838, 6.4101, 6.5350,\n 6.4618, 6.5857, 6.5130, 6.6361, 6.7584, 6.6861, 6.8075, 6.9282,\n 7.0481, 7.1673, 7.2857, 7.4034, 7.5204, 7.6368, 7.7524, 7.6800,\n 7.6082, 7.7230, 7.8372, 7.9507, 8.0636, 7.9921, 8.1043, 8.2158,\n 8.3268, 8.4371, 8.3660, 8.2954, 8.4050, 8.3349, 8.4439, 8.5524,\n 8.6603, 8.5905, 8.6978, 8.6284, 8.7351, 8.8413, 8.7724, 8.7039,\n 8.8094, 8.9145, 9.0190, 9.1230, 9.0549, 9.1584, 9.2613, 9.1936,\n 9.2960, 9.3980, 9.4995, 9.6005, 9.7011, 9.8012, 9.7337, 9.6667,\n 9.7663, 9.8654, 9.9641, 10.0624, 9.9957, 10.0935, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The group will be headed by State Department official John S. Wolf , who has served in Australia , Vietnam , Greece and Pakistan .\nSentence 2: The group will be headed by John S. Wolf , an assistant secretary of state who has served in Australia , Vietnam , Greece and Pakistan .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "42", + "# Tokens in Greenlist": "9", + "Fraction of T in Greenlist": "21.4%", + "z-score": "-0.535", + "p value": "0.704", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The commission must work out the plan 's details , but the average residential customer paying $ 840 a year would get a savings of about $ 30 annually .\nSentence 2: An average residential customer paying $ 840 a year for electricity could see a savings of $ 30 annually .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.4140, -2.2156,\n -2.2644, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.5718, -2.6135, -2.4467, -2.2813, -2.3238, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.1268, -2.1691, -2.2111, -2.2528, -2.0948, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.9575, -2.8146, -2.6726,\n -2.7091, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.7187, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The company has said it plans to restate its earnings for 2000 through 2002 .\nSentence 2: The company had announced in January that it would have to restate earnings for 2002 , 2001 and perhaps 2000 .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "27.5%", + "z-score": "0.608", + "p value": "0.271", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.5023, 0.4444, 0.6083])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774, 0.4201, 0.2722,\n 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714, 0.3464, 0.2265, 0.1111,\n 0.4364, 0.3216, 0.2108, 0.5185, 0.4082, 0.3015, 0.5941, 0.8783, 0.7698,\n 1.0441, 1.3112, 1.5717, 1.8257, 1.7132, 1.6036, 1.8489, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.3333, 1.5671, 1.7963, 1.6977, 1.9215, 2.1412, 2.0428,\n 2.2576, 2.1602, 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094,\n 2.3094, 2.5064, 2.4163, 2.6098, 2.8006, 2.9887, 3.1743, 3.0833, 2.9938,\n 2.9057, 2.8189, 2.7333, 2.6491, 2.5660, 2.7456, 2.6632, 2.5820, 2.7585,\n 2.9329, 2.8518, 2.7717, 2.9433, 3.1129, 3.2806, 3.4466, 3.3659, 3.2863,\n 3.2077, 3.3708, 3.5322, 3.4538, 3.3764, 3.5355, 3.4586, 3.6159, 3.5396,\n 3.6950, 3.6193, 3.5443, 3.4702, 3.3968, 3.3243, 3.2525, 3.4047, 3.5556,\n 3.4839, 3.6332, 3.7812, 3.9279, 4.0734, 4.0015, 3.9302, 3.8596, 3.7897,\n 3.7205, 3.6519, 3.5839, 3.7265, 3.6590, 3.5920, 3.7330, 3.8730, 3.8061,\n 3.7399, 3.8784, 4.0158, 4.1522, 4.2877, 4.2212, 4.1552, 4.0898, 4.2237,\n 4.3566, 4.2914, 4.2267, 4.3583, 4.2940, 4.4246, 4.3605, 4.2970, 4.2339,\n 4.1713, 4.1092, 4.0476, 3.9865, 3.9258, 4.0541, 4.1816, 4.1210, 4.2475,\n 4.3733, 4.4983, 4.6225, 4.5617, 4.5013, 4.4413, 4.3818, 4.3226, 4.2639,\n 4.2056, 4.3280, 4.2699, 4.2122, 4.3336, 4.4544, 4.3967, 4.3395, 4.4593,\n 4.5783, 4.6968, 4.8146, 4.7572, 4.7001, 4.6434, 4.7602, 4.8763, 4.8197,\n 4.7635, 4.8787, 4.8227, 4.9373, 4.8815, 4.9953, 5.1086, 5.0529, 4.9975,\n 4.9425, 4.8877, 4.8333, 4.9455, 5.0571, 5.0027, 5.1137, 5.2241, 5.3340,\n 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Results from No. 2 U.S. soft drink maker PepsiCo Inc . PEP.N were likely to be in the spotlight .\nSentence 2: Results from No. 2 U.S. soft drink maker PepsiCo Inc . ( nyse : PEP - news - people ) were likely to be in the spotlight .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962, 5.4611, 5.7155,\n 5.9604, 5.6804, 5.4175, 5.6622, 5.8989, 6.1283, 6.3509, 6.1143, 5.8889,\n 5.6737, 5.4678, 5.6921, 5.9106, 5.7155, 5.9297, 6.1389, 6.3434, 6.1584,\n 6.3594, 6.1815, 6.0093, 5.8424, 5.6805, 5.5234, 5.3708, 5.2223, 5.4222,\n 5.2778, 5.1371, 5.0000, 4.8662, 4.7357, 4.6082, 4.8038, 4.6790, 4.5569,\n 4.4374, 4.6291, 4.5118, 4.3970, 4.2844, 4.1740, 4.0657, 3.9595, 3.8552,\n 3.7528, 3.6522, 3.5533, 3.4562, 3.3607, 3.2667, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.3556, 3.2686, 3.1829, 3.3566, 3.2717,\n 3.1879, 3.3587, 3.2757, 3.1937, 3.1129, 3.0330, 2.9542, 2.8764, 3.0429,\n 2.9656, 2.8893, 2.8138, 2.9775, 2.9025, 3.0641, 2.9897, 2.9161, 2.8433,\n 2.7713, 2.7001, 2.6296, 2.5600, 2.4910, 2.4228, 2.3552, 2.2884, 2.2222,\n 2.3779, 2.3120, 2.2468, 2.1822, 2.3354, 2.2711, 2.4227, 2.5731, 2.5087,\n 2.4449, 2.5934, 2.7406, 2.6768, 2.8226, 2.7591, 2.6961, 2.8402, 2.9832,\n 3.1251, 3.2660, 3.4058, 3.5446, 3.6824, 3.8191, 3.9549, 4.0898, 4.2237,\n 4.3566, 4.4887, 4.4233, 4.3583, 4.4891, 4.6191, 4.7481, 4.8763, 4.8113,\n 4.7467, 4.6826, 4.6190, 4.7458, 4.8717, 4.8083, 4.9333, 5.0576, 5.1810,\n 5.1177, 5.2402, 5.1772, 5.1146, 5.0525, 4.9908, 4.9295, 4.8687, 4.8083,\n 4.9292, 4.8690, 4.8093, 4.7500, 4.6911, 4.6325, 4.5744, 4.6938, 4.6359,\n 4.5783, 4.5212, 4.6395, 4.5826, 4.5260, 4.4698, 4.4140, 4.3585, 4.3033,\n 4.2485, 4.1940, 4.1399, 4.0860, 4.0325, 3.9793, 3.9265, 3.8739, 3.9896,\n 3.9372, 3.8851, 3.8333, 3.9481, 3.8964, 4.0105, 4.1239, 4.0723, 4.1851,\n 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" The result is an overall package that will provide significant economic growth for our employees over the next four years . \"\nSentence 2: \" The result is an overall package that will provide a significant economic growth for our employees over the next few years , \" he said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.3944, 0.3365, 0.5023, 0.6667, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.8540, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.8805, 0.8340, 0.7878, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.9415, 0.8963, 1.0215,\n 0.9763, 1.1007, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.1667,\n 1.1221, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641, 3.2206, 2.9938,\n 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712, 4.0415, 4.3027, 4.1111,\n 4.3644, 4.6101, 4.4272, 4.2515, 4.0825, 4.3217, 4.1586, 4.3916, 4.6188,\n 4.8407, 5.0576, 4.8999, 5.1121, 5.3199, 5.5234, 5.3708, 5.2223, 5.0779,\n 4.9373, 5.1371, 5.3333, 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425,\n 6.6172, 6.7893, 6.6531, 6.8229, 6.9903, 7.1554, 7.0226, 7.1857, 7.3467,\n 7.2169, 7.3760, 7.5331, 7.6883, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 7.9196, 7.8000, 7.6823, 7.5664, 7.4524, 7.6000, 7.4878, 7.3773,\n 7.2684, 7.1611, 7.0553, 7.2016, 7.0973, 6.9945, 7.1393, 7.0379, 6.9378,\n 6.8391, 6.7416, 6.6454, 6.7886, 6.9305, 6.8354, 6.9759, 6.8819, 7.0211,\n 7.1591, 7.2960, 7.2029, 7.1110, 7.2466, 7.1556, 7.2900, 7.2001, 7.3333,\n 7.2443, 7.1563, 7.0692, 6.9830, 6.8977, 7.0296, 6.9451, 6.8615, 6.7788,\n 6.6968, 6.6157, 6.7462, 6.6658, 6.5861, 6.5072, 6.4291, 6.3517, 6.2750,\n 6.1990, 6.1237, 6.0491, 6.1777, 6.3054, 6.2312, 6.1577, 6.0848, 6.2113,\n 6.3369, 6.4618, 6.5857, 6.5130, 6.4409, 6.3694, 6.2985, 6.4213, 6.3509,\n 6.4728, 6.5939, 6.7143, 6.8339, 6.9529, 6.8825, 7.0006, 6.9307, 6.8614,\n 6.9786, 6.9097, 6.8413, 6.7734, 6.8897, 7.0054, 6.9378, 6.8707, 6.9856,\n 6.9189, 7.0330, 6.9667, 7.0801, 7.0142, 6.9488, 7.0614, 6.9964, 6.9317,\n 7.0436, 6.9793, 6.9155, 6.8520, 6.7890, 6.9000, 7.0104, 7.1203, 7.0574,\n 6.9950, 6.9330, 6.8713, 6.9803, 6.9190, 6.8580, 6.9663, 7.0741, 7.1813,\n 7.2881, 7.3943, 7.3333, 7.2728, 7.2125, 7.1527, 7.2581, 7.3631, 7.4676,\n 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The songs are on offer for 99 cents each , or $ 9.99 for an album .\nSentence 2: The company will offer songs for 99 cents and albums for $ 9.95 .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.9769, 11.0883, 10.9917, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.4574, 11.5645, 11.6709, 11.7766, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 12.1125, 12.2150, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.3391, 12.4395, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.7581, 12.8556, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.1644, 13.2593, 13.3537, 13.4477, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.5589, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.8434, 13.9343, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.3166, 14.4046, 14.4923, 14.5797, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: However , the talk was downplayed by PBL which said it would focus only on smaller purchases that were immediately earnings and cash flow accretive .\nSentence 2: The talk , however , has been downplayed by PBL which said it would focus only on smaller purchases that were immediately earnings and cash flow-accretive .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.0580, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.0911,\n -0.9233, -0.9733, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.6713, -0.7201, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.3195, -1.3608, -1.2210, -1.0820, -1.1237, -0.9858, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.7044, -0.7462, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.7246, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 11.7907, 11.9062, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.3447, 12.4550,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.7743, 12.8813, 12.9875, 12.8749,\n 12.9807, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.4804, 13.5813, 13.6816, 13.7813, 13.8804,\n 13.9790, 13.8745, 13.9728, 14.0705, 14.1677, 14.0649, 14.1618, 14.0601,\n 13.9594, 13.8595, 13.7606, 13.8578, 13.9544, 13.8567, 13.9531, 13.8564,\n 13.9524, 14.0479, 13.9524, 13.8577, 13.9530, 13.8593, 13.7663, 13.6742,\n 13.5827, 13.6781, 13.7730, 13.6826, 13.7772, 13.6876, 13.7818, 13.8756,\n 13.7870, 13.6990, 13.7926, 13.7054, 13.6188, 13.5329, 13.4477, 13.5412,\n 13.6343, 13.5499, 13.6427, 13.5589, 13.6514, 13.7434, 13.6604, 13.5781,\n 13.6698, 13.5881, 13.5069, 13.4263, 13.3463, 13.4380, 13.5292, 13.4499,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.5647, 13.4871, 13.5771, 13.5000,\n 13.4234, 13.3473, 13.2717, 13.3615, 13.4510, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Comcast Class A shares were up 8 cents at $ 30.50 in morning trading on the Nasdaq Stock Market .\nSentence 2: The stock rose 48 cents to $ 30 yesterday in Nasdaq Stock Market trading .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "44", + "# Tokens in Greenlist": "10", + "Fraction of T in Greenlist": "22.7%", + "z-score": "-0.348", + "p value": "0.636", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "93", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "59.1%", + "z-score": "7.6", + "p value": "1.44e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415, 3.7808, 3.5382,\n 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426, 4.5033, 4.7556, 4.5556,\n 4.8008, 5.0389, 4.8488, 5.0811, 5.3072, 5.1257, 5.3468, 5.1723, 5.3886,\n 5.6000, 5.8068, 6.0093, 6.2075, 6.0412, 6.2361, 6.4273, 6.2668, 6.4550,\n 6.6398, 6.8214, 7.0000, 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711,\n 6.9286, 6.7893, 6.9589, 6.8229, 6.9903, 7.1554, 7.0226, 6.8924, 6.7648,\n 6.9282, 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 7.7460, 7.6339,\n 7.7784, 7.6681, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855, 7.5275, 7.4247,\n 7.3233, 7.4639, 7.6033])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The Washington Post said Airlite would shut down its first shift and parts of the second shift Monday to accommodate the president \u2019 s appearance .\nSentence 2: The plant plans to shut down its first shift and parts of the second shift Monday to accommodate the president 's appearance , Crosby said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.0050, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.1380, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.9333, 1.2247,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.8034, 1.6859, 1.9415, 1.8257,\n 2.0738, 2.3163, 2.2011, 2.0889, 1.9795, 1.8728, 1.7685, 2.0000,\n 1.8970, 2.1229, 2.3445, 2.5621, 2.7757, 2.9856, 3.1918, 3.3947,\n 3.5942, 3.7905, 3.6831, 3.8759, 3.7700, 3.6662, 3.8552, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.5630, 5.7242, 5.6220, 5.7812, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.1813, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.1591, 7.2960, 7.2029, 7.1110, 7.2466,\n 7.1556, 7.2900, 7.2001, 7.3333, 7.4655, 7.3765, 7.5076, 7.4194,\n 7.3322, 7.4622, 7.3758, 7.5048, 7.6328, 7.7598, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.1731, 8.2956, 8.4173, 8.5381, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.8975, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 9.9800, 10.0881, 10.0076, 9.9278,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.2753, 10.1968, 10.1189, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.3572, 11.4533, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.2068, 12.2992, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: A former teammate , Carlton Dotson , has been charged with the murder .\nSentence 2: His body was found July 25 , and former teammate Carlton Dotson has been charged in his shooting death .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.9769, 11.0883, 10.9917, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.4574, 11.5645, 11.6709, 11.7766, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 12.1125, 12.2150, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.3391, 12.4395, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.7581, 12.8556, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.1644, 13.2593, 13.3537, 13.4477, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.5589, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.8434, 13.9343, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.3166, 14.4046, 14.4923, 14.5797, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.9769, 11.0883, 10.9917, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.4574, 11.5645, 11.6709, 11.7766, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 12.1125, 12.2150, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.3391, 12.4395, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.7581, 12.8556, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.1644, 13.2593, 13.3537, 13.4477, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.5589, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.8434, 13.9343, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.3166, 14.4046, 14.4923, 14.5797, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Several of the questions asked by the audience in the fast-paced forum were new to the candidates .\nSentence 2: Several of the audience questions were new to the candidates as well .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.5010, -1.5479, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.4976, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.0203, -2.0605,\n -2.1004, -2.1401, -1.9941, -1.8490, -1.8892, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.7237, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.4393, -2.3094, -2.3447, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140, 4.5033, 4.7556, 5.0000,\n 5.2372, 5.4678, 5.2705, 5.0811, 5.3072, 5.5277, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.0576, 5.2697, 5.4772, 5.3199, 5.1671, 5.0186, 4.8742, 5.0779,\n 5.2778, 5.4740, 5.6667, 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140,\n 5.6830, 5.5549, 5.4295, 5.6099, 5.7877, 5.6647, 5.5442, 5.4259, 5.3100,\n 5.4848, 5.6573, 5.8275, 5.9954, 5.8812, 5.7689, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386, 5.8377,\n 5.7382, 5.8936, 6.0474, 6.1996, 6.3502, 6.2517, 6.1546, 6.3035, 6.4510,\n 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640, 6.5069, 6.4153, 6.3248,\n 6.2354, 6.1470, 6.2883, 6.4283, 6.5672, 6.7049, 6.8414, 6.7536, 6.8889,\n 6.8019, 6.7159, 6.6308, 6.5465, 6.6804, 6.8133, 6.7298, 6.8615, 6.9923,\n 6.9094, 7.0391, 6.9570, 7.0857, 7.2134, 7.3402, 7.4661, 7.5910, 7.5094,\n 7.6335, 7.5526, 7.4724, 7.3930, 7.3143, 7.4373, 7.5595, 7.4813, 7.6026,\n 7.7232, 7.6456, 7.7653, 7.6883, 7.8072, 7.9253, 8.0427, 8.1594, 8.2754,\n 8.1988, 8.3140, 8.2381, 8.1628, 8.0880, 8.0139, 8.1282, 8.2420, 8.1683,\n 8.2813, 8.3937, 8.3205, 8.4322, 8.3595, 8.4706, 8.5810, 8.6908, 8.8000,\n 8.9086, 8.8364, 8.9444, 8.8726, 8.8013, 8.7305, 8.6603, 8.7676, 8.8744,\n 8.8045, 8.9107, 9.0164, 8.9469, 9.0520, 8.9830, 9.0876, 9.1916, 9.2952,\n 9.3982, 9.5007, 9.4320, 9.5341, 9.4658, 9.3980, 9.3306, 9.2637, 9.3651,\n 9.4661, 9.3995, 9.5000, 9.6000, 9.5338, 9.6334, 9.5675, 9.6666, 9.7653,\n 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Meanwhile , the global death toll approached 770 with more than 8,300 people sickened since the severe acute respiratory syndrome virus first appeared in southern China in November .\nSentence 2: The global death toll from SARS was at least 767 , with more than 8,300 people sickened since the virus first appeared in southern China in November .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.3026, 0.4815, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.6083, 0.7707, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.3862,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.6554, 1.7913, 1.7384, 1.8732,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.7970, 1.7454, 1.8773, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.9837, 1.9327, 2.0613, 2.0105, 1.9599,\n 1.9097, 2.0369, 1.9868, 1.9370, 1.8875, 2.0134, 1.9640, 1.9149,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.5159, 1.4699, 1.5916, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.7688, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "187", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.47", + "p value": "3.92e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641, 3.2206, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856, 1.7321, 1.5852, 1.8889,\n 1.7457, 1.6082, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 1.6590, 1.9245,\n 1.8034, 2.0605, 2.3113, 2.1909, 2.0738, 1.9599, 2.2011, 2.0889, 1.9795,\n 1.8728, 2.1054, 2.0000, 1.8970, 1.7963, 1.6977, 1.9215, 1.8240, 1.7285,\n 1.6348, 1.8516, 1.7589, 1.6678, 1.5785, 1.7889, 1.9959, 2.1997, 2.4004,\n 2.3094, 2.2200, 2.1320, 2.0455, 2.2404, 2.1546, 2.0702, 2.2611, 2.1773,\n 2.0948, 2.2819, 2.2000, 2.3842, 2.5660, 2.4841, 2.6632, 2.5820, 2.7585,\n 2.6778, 2.5983, 2.5198, 2.4423, 2.6148, 2.5378, 2.7080, 2.8764, 3.0429,\n 3.2077, 3.3708, 3.5322, 3.4538, 3.6133, 3.5355, 3.6931, 3.8492, 4.0038,\n 4.1569, 4.0788, 4.0016, 4.1528, 4.0762, 4.0004, 4.1497, 4.0745, 4.2222,\n 4.3687, 4.2938, 4.2196, 4.1461, 4.0734, 4.0015, 3.9302, 4.0740, 4.0032,\n 3.9331, 4.0753, 4.0056, 3.9365, 4.0771, 4.0085, 3.9404, 4.0795, 4.0119,\n 4.1498, 4.2866, 4.2191, 4.1522, 4.0859, 4.2212, 4.1552, 4.0898, 4.0249,\n 4.1586, 4.0941, 4.0301, 3.9666, 3.9036, 4.0356, 3.9729, 4.1038, 4.2339,\n 4.3631, 4.4915, 4.6190, 4.7458, 4.6825, 4.8083, 4.7454, 4.8702, 4.9943,\n 5.1177, 5.2402, 5.3621, 5.4832, 5.4199, 5.5402, 5.4772, 5.5967, 5.7155,\n 5.8336, 5.9510, 6.0678, 6.1839, 6.1207, 6.2361, 6.1732, 6.2879, 6.4019,\n 6.5153, 6.6282, 6.7404, 6.8520, 6.7890, 6.9000, 6.8373, 6.9477, 7.0574,\n 7.1667, 7.2753, 7.3835, 7.4911, 7.4283, 7.5353, 7.4729])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The battles marked day four of a U.S. sweep to hunt down supporters of Saddam Hussein 's fallen regime .\nSentence 2: Twenty-seven Iraqis were killed , pushing the number of opposition deaths to about 100 in a U.S. operation to hunt down supporters of Saddam Hussein 's fallen regime .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.2598, 5.9604, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 6.9378,\n 7.1232, 7.3051, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 7.8355, 7.6751, 7.8420, 8.0064, 7.8512, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.7967, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 10.0664, 9.9351, 9.8058, 9.9384, 10.0698,\n 9.9433, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.0810, 11.1990, 11.0851, 10.9727, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.4420, 11.5556, 11.6683, 11.5601, 11.6723, 11.5655,\n 11.6772, 11.5718, 11.4675, 11.5788, 11.6894, 11.7992, 11.6966, 11.5950,\n 11.4945, 11.6041, 11.7130, 11.6139, 11.7222, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 12.0902, 12.1936, 12.1012, 12.2040, 12.1125, 12.0218, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.4625, 12.5615, 12.6601, 12.5732, 12.4870, 12.4015, 12.4998, 12.4150,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.3603, 12.2782, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.6597, 12.7532, 12.6757, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.9691, 12.8928, 12.9845, 12.9087, 12.8333,\n 12.9247, 12.8499, 12.7756, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The women then had follow-up examinations after five , 12 and 24 years .\nSentence 2: The women had follow-up examinations in 1974-75 , 1980-81 and 1992-93 , but were not asked about stress again .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.0498, -1.1025, -1.1547, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.0881, -2.1284, -2.1685, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -3.1342, -3.1674, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.5553, 8.4679, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.2867, 9.3993, 9.3181, 9.4301,\n 9.3495, 9.2697, 9.3810, 9.3017, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.4752, 9.5840, 9.5066, 9.6148, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.7828, 9.7072, 9.8131, 9.9184, 9.8433, 9.7688, 9.8736, 9.9778,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.4909, 10.4170, 10.5181, 10.4447,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.4281, 10.5278, 10.6271, 10.5556,\n 10.6544, 10.5833, 10.6817, 10.6111, 10.5410, 10.6389, 10.7363, 10.8333,\n 10.7637, 10.8602, 10.9564, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The Embraer jets are scheduled to be delivered by September 2006 .\nSentence 2: The Bombardier and Embraer aircraft will be delivered to U.S. Airways by September 2006 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Contrary to what PeopleSoft management would have you believe , Oracle intends to fully support PeopleSoft customers and products for many years to come . \"\nSentence 2: Ellison said that contrary to the contentions of PeopleSoft management , Oracle intends to \" fully support PeopleSoft customers and products \" for many years to come .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -1.9345, -1.9829, -2.0309, -1.8475, -1.6660, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.5275,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.3195, -1.1794, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.5916, -1.6292, -1.5000,\n -1.3714, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "93", + "Fraction of T in Greenlist": "46.7%", + "z-score": "7.08", + "p value": "7.19e-13", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495, 2.1170,\n 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.5281, 2.3570, 2.6558, 2.4910, 2.3333,\n 2.1822, 2.0370, 2.3190, 2.1776, 2.0412, 1.9096, 2.1783, 2.0494, 2.3094,\n 2.5627, 2.8098, 2.6811, 2.9212, 2.7952, 3.0290, 2.9055, 3.1334, 3.0123,\n 3.2348, 3.4528, 3.6667, 3.8765, 3.7559, 3.6380, 3.8431, 3.7273, 3.6141,\n 3.5032, 3.3947, 3.2883, 3.4873, 3.3824, 3.5777, 3.7700, 3.9595, 4.1461,\n 4.0415, 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.7278, 4.8990,\n 5.0680, 4.9666, 4.8667, 4.7683, 4.9346, 5.0990, 5.2615, 5.4222, 5.5811,\n 5.4832, 5.6401, 5.7955, 5.9491, 6.1012, 6.0041, 6.1546, 6.3035, 6.4510,\n 6.5970, 6.5008, 6.4059, 6.3122, 6.2197, 6.1283, 6.0380, 5.9488, 5.8606,\n 6.0044, 6.1470, 6.0596, 6.2008, 6.1143, 6.0288, 5.9442, 5.8605, 6.0000,\n 5.9171, 5.8351, 5.7540, 5.8919, 6.0287, 6.1644, 6.0837, 6.2183, 6.1382,\n 6.0590, 6.1923, 6.1137, 6.0359, 6.1680, 6.0908, 6.0143, 5.9386, 5.8635,\n 5.7892, 5.9196, 6.0491, 6.1777, 6.3054, 6.2312, 6.3580, 6.4838, 6.6088,\n 6.7330, 6.6591, 6.7823, 6.9048, 7.0265, 7.1474, 7.0737, 7.0007, 6.9282,\n 6.8563, 6.7850, 6.7143, 6.6441, 6.5745, 6.6939, 6.8127, 6.7434, 6.8614,\n 6.7925, 6.7242, 6.6564, 6.5891, 6.7060, 6.6391, 6.5727, 6.5067, 6.6227,\n 6.7380, 6.8527, 6.7869, 6.9009, 6.8355, 6.7706, 6.8838, 6.9964, 7.1083,\n 7.0436, 7.1549, 7.0905, 7.2012, 7.3113, 7.4208, 7.5297, 7.4655, 7.4017,\n 7.3383, 7.2753, 7.2127, 7.1506, 7.0888, 7.0273, 7.1352, 7.2425, 7.1813,\n 7.2881, 7.2272, 7.1667, 7.1065, 7.0467, 7.1527, 7.0932, 7.0340, 6.9752,\n 7.0804])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Application Intelligence will be included as part of the company 's SmartDefense application , which is included with Firewall-1 .\nSentence 2: The new application intelligence features will be available June 3 and are included with the SmartDefense product , which comes with FireWall-1 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.5986, 1.7942, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.7552, 1.6854, 1.8559, 2.0247, 1.9548, 1.8856,\n 2.0517, 1.9829, 2.1470, 2.0785, 2.0107, 1.9437, 2.1049, 2.2646,\n 2.4228, 2.3552, 2.2884, 2.2222, 2.1567, 2.0918, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.9920, 1.9298, 1.8682, 1.8071, 1.7465, 1.8974,\n 2.0470, 1.9863, 2.1344, 2.0739, 2.0140, 2.1602, 2.3054, 2.2454,\n 2.1858, 2.3293, 2.4717, 2.6131, 2.5532, 2.4938, 2.4348, 2.3764,\n 2.3183, 2.2608, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.0068, 2.1429, 2.2780, 2.2230, 2.3570, 2.3022, 2.2478,\n 2.3805, 2.5123, 2.4578, 2.4037, 2.5343, 2.6640, 2.7930, 2.7386,\n 2.6846, 2.6309, 2.5776, 2.5247, 2.4721, 2.4198, 2.3679, 2.3163,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.2387, 2.3635, 2.4877, 2.4371,\n 2.5604, 2.5099, 2.4597, 2.5820, 2.7036, 2.6534, 2.6034, 2.7240,\n 2.8440, 2.9633, 2.9132, 2.8633, 2.8137, 2.7644, 2.7154, 2.6667,\n 2.6182, 2.5700, 2.5220, 2.6393, 2.5915, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: American Masters : Arthur Miller , Elia Kazan and the Blacklist : None Without Sin ( Wed .\nSentence 2: Note the subheading of this terrible parable in the \" American Masters \" series , \" Arthur Miller , Elia Kazan and the Blacklist : None Without Sin . \"\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.9861, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.6566, 9.7725, 9.8877, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.0504, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.4537, 10.5621, 10.6700, 10.5848,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.7822, 10.8867, 10.9906, 10.9091, 11.0125, 11.1154, 11.2178, 11.1370,\n 11.0569, 11.1588, 11.2602, 11.1807, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.3249, 11.4244, 11.5234, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.6840, 11.7808, 11.7050, 11.8014, 11.8973, 11.9928, 11.9176,\n 11.8429, 11.9380, 12.0327, 11.9586, 12.0529, 12.1468, 12.2403, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The downtime , to take place in May and June , is expected to cut production by 60 million to 70 million board feet .\nSentence 2: The downtime is expected to take 60 million to 70 million board feet out of the companys system .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 1.0445, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 1.0328,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.5852,\n 1.7467, 1.9066, 1.8419, 1.7778, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.5681, 1.5097, 1.6591, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.2423, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.0879, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.9979, 0.9497, 1.0820, 1.0338, 0.9858, 1.1169, 1.0690,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.9629, 0.9165, 1.0445,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.7177, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: On July 3 , Troy is expected to be sentenced to life in prison without parole .\nSentence 2: Troy faces life in prison without parole at his July 30 sentencing .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The University of Michigan released a new undergraduate admission process Thursday , dropping a point system the U.S. Supreme Court found unconstitutional in June .\nSentence 2: The University of Michigan released today a new admissions policy after the U.S. Supreme Court struck down in June the way it previously admitted undergraduates .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "26.9%", + "z-score": "0.581", + "p value": "0.281", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.8907, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.6198, 0.7851, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.9733, 1.1306, 1.0721, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.8978, 1.0435, 1.1882,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.6274, 0.5808])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 6.9488, 6.7795, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.0219, 6.8718, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.5347, 8.6817, 8.8271, 8.6948, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 9.9754, 10.1024, 9.9863, 10.1124, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.0385, 9.9304, 10.0535, 9.9469, 10.0692, 10.1905,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.1564, 10.0577, 9.9601, 9.8634, 9.9813, 9.8858, 9.7912, 9.6977,\n 9.6050, 9.5133, 9.6307, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.0504, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.0952, 11.0102, 11.1151, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 11.9197, 12.0180, 11.9377,\n 12.0355, 12.1329, 12.2298, 12.3263, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.8165, 12.7378, 12.8313, 12.9244, 13.0171, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.7801, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The processors were announced in San Jose at the Intel Developer Forum .\nSentence 2: The new processor was unveiled at the Intel Developer Forum 2003 in San Jose , Calif .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.8402,\n -2.8868, -2.9329, -2.9785, -3.0237, -3.0685, -3.1129, -3.1568, -3.2004,\n -2.9988, -2.7995, -2.8446, -2.8893, -2.9336, -2.9775, -3.0210, -3.0641,\n -2.8724, -2.9161, -2.9593, -2.7713, -2.8150, -2.8583, -2.9013, -2.7175,\n -2.7608, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.9109, -2.9515,\n -2.9917, -3.0317, -3.0714, -3.1109, -3.1500, -3.1889, -3.2276, -3.0619,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -2.8928, -2.9317, -2.9704,\n -3.0089, -3.0471, -3.0851, -3.1229, -3.1604, -3.1977, -3.0417, -3.0792,\n -3.1165, -3.1536, -3.1905, -3.2271, -3.2636, -3.2998, -3.3359, -3.3717,\n -3.4073, -3.4428, -3.4780, -3.3282, -3.3637, -3.3989, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.6645, -3.6979, -3.7311, -3.5890, -3.4478, -3.4816,\n -3.5151, -3.5485, -3.5817, -3.6148, -3.6477, -3.5093, -3.5424, -3.5753,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.4017, -3.4346, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The Justice Department filed suit Thursday against the state of Mississippi for failing to end what federal officials call \" disturbing \" abuse of juveniles and \" unconscionable \" conditions at two state-run facilities .\nSentence 2: The Justice Department filed a civil rights lawsuit Thursday against the state of Mississippi , alleging abuse of juvenile offenders at two state-run facilities .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.8295, -2.5775, -2.3301, -2.3851, -2.1437, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -1.7942, -1.8500, -1.9052,\n -1.9596, -1.7450, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.4086, -1.2337, -1.2837, -1.1111, -0.9401, -0.9909, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.6124,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -1.8527, -1.8898, -1.9267, -1.9635, -1.8333,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 7.9216, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.5448, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.4501, 9.3611, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.7175, 10.8200, 10.7423, 10.6650, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.8170, 10.7415, 10.8421, 10.7671, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.3809, 11.4766, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: It said the damage to the wing provided a pathway for hot gasses to penetrate the ship 's thermal armor during Columbia 's ill-fated reentry .\nSentence 2: The document says the damage to the wing provided a pathway for hot gases to penetrate Columbia 's thermal armour during its fatal re-entry .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -0.7333, -0.7947, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.5843, -0.6274, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.2596, -0.1295, -0.1721, -0.2146, -0.2568, -0.1280, -0.1703,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415, 3.7808, 3.5382,\n 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426, 4.0415, 3.8497, 3.6667,\n 3.9279, 3.7524, 4.0056, 3.8367, 4.0825, 4.3217, 4.5547, 4.3916, 4.6188,\n 4.4610, 4.6829, 4.8999, 4.7469, 4.9592, 4.8107, 4.6664, 4.8742, 5.0779,\n 5.2778, 5.4740, 5.3333, 5.5261, 5.3889, 5.2549, 5.1241, 5.3134, 5.1854,\n 5.3716, 5.2463, 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 5.7192, 5.8919,\n 6.0622, 5.9438, 6.1118, 5.9954, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550, 6.3509,\n 6.5033, 6.6541, 6.8034, 6.7006, 6.8483, 6.7469, 6.8931, 7.0379, 6.9378,\n 7.0812, 6.9824, 6.8849, 7.0268, 7.1674, 7.0711, 6.9759, 6.8819, 7.0211,\n 6.9282, 7.0662, 6.9743, 7.1110, 7.0201, 7.1556, 7.0657, 7.2001, 7.3333,\n 7.4655, 7.3765, 7.5076, 7.4194, 7.5494, 7.6785, 7.5912, 7.7192, 7.6328,\n 7.7598, 7.6742, 7.8003, 7.7155, 7.8406, 7.7566, 7.8808, 7.7976, 7.7152,\n 7.6335, 7.7567, 7.6758, 7.7981, 7.7178, 7.8393, 7.9600, 8.0798, 8.0002,\n 8.1192, 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.2365, 8.3525, 8.2754,\n 8.3906, 8.3140, 8.2381, 8.1628, 8.2772, 8.2024, 8.1282, 8.0546, 8.1683,\n 8.0952, 8.2082, 8.1356, 8.2479, 8.3595, 8.4706, 8.3984, 8.5088, 8.4371,\n 8.5469, 8.6560, 8.5848, 8.6933, 8.6226, 8.7305, 8.6603, 8.7676, 8.6978,\n 8.8045, 8.7351, 8.8413, 8.7724, 8.7039, 8.6359, 8.7414, 8.6738, 8.7788,\n 8.7116, 8.8160, 8.9199, 9.0233, 8.9565, 9.0593, 8.9929, 9.0952, 9.1971,\n 9.1310, 9.2324, 9.1667, 9.2676, 9.2022, 9.3026, 9.2376, 9.1730, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Also demonstrating box-office strength _ and getting seven Tony nominations _ was a potent revival of Eugene O 'Neill 's family drama , \" Long Day 's Journey Into Night . \"\nSentence 2: Also demonstrating box-office strength -- and getting seven Tony nominations -- was a potent revival of Eugene ONeills family drama , Long Days Journey Into Night . \"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "6", + "Fraction of T in Greenlist": "24.0%", + "z-score": "-0.115", + "p value": "0.546", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415, 3.7808, 3.5382,\n 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284, 2.6558, 2.4910, 2.7778,\n 2.6186, 2.8947, 3.1623, 3.0072, 2.8577, 2.7136, 2.9704, 3.2205, 3.0792,\n 3.3221, 3.5590, 3.4207, 3.6515, 3.5165, 3.7417, 3.6098, 3.4816, 3.7009,\n 3.9158, 3.7897, 3.6667, 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 4.2426,\n 4.1260, 4.3205, 4.2060, 4.0937, 4.2844, 4.4721, 4.3614, 4.2528, 4.4371,\n 4.6188, 4.5115, 4.6904, 4.5847, 4.7610, 4.9348, 4.8305, 4.7278, 4.8990,\n 4.7977, 4.9666, 5.1333, 5.0332, 4.9346, 4.8375, 5.0017, 4.9058, 4.8113,\n 4.9731, 5.1332, 5.0395, 5.1977, 5.1051, 5.2614, 5.4160, 5.3243, 5.2338,\n 5.3865, 5.2970, 5.4480, 5.5976, 5.7458, 5.6569, 5.8035, 5.7155, 5.6285,\n 5.5426, 5.6874, 5.8310, 5.7457, 5.8878, 5.8034, 5.9442, 6.0838, 6.2222,\n 6.1383, 6.2755, 6.1924, 6.3283, 6.2459, 6.1644, 6.2991, 6.4327, 6.3517,\n 6.4842, 6.6157, 6.5354, 6.6658, 6.5861, 6.7155, 6.6365, 6.5583, 6.6865,\n 6.8138, 6.7361, 6.8624, 6.9879, 7.1125, 7.0353, 6.9587, 7.0823, 7.0063,\n 7.1291, 7.2510, 7.1755, 7.1007, 7.2217, 7.3419, 7.2675, 7.1938, 7.3131,\n 7.4317, 7.3584, 7.4762, 7.5933, 7.7096, 7.6368, 7.7524, 7.6800, 7.6082,\n 7.7230, 7.8372, 7.7658, 7.8793, 7.9921, 8.1043, 8.0333, 7.9628, 8.0742,\n 8.0042, 8.1150, 8.2252, 8.1556, 8.0865, 8.0178, 8.1273, 8.0591, 7.9913,\n 8.1001, 8.2084, 8.1410, 8.2486, 8.3557, 8.4623, 8.3952, 8.5012, 8.4345,\n 8.3683, 8.4736, 8.5785, 8.5126, 8.6169, 8.7207, 8.8240, 8.7584, 8.6932,\n 8.7959, 8.7311, 8.8333, 8.9351, 9.0364, 8.9718, 9.0726, 9.0085, 8.9446,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: But Secretary of State Colin Powell brushed off this possibility Wednesday .\nSentence 2: Secretary of State Colin Powell last week ruled out a non-aggression treaty .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.9393, 0.8716, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.9671, 1.1323,\n 1.0705, 1.2337, 1.3954, 1.3333, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.4171, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.4289,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.5519, 1.6958, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.5818, 1.5303, 1.4792, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.5926, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.6036,\n 1.5544, 1.5055, 1.6336, 1.5848, 1.5363, 1.4881, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.4546, 1.5752, 1.5298, 1.4846, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Thomas and Tauzin say , as do many doctors , that the Bush administration has the power to correct some of those flaws .\nSentence 2: Like many doctors , Mr. Thomas and Mr. Tauzin say the Bush administration has the power to correct some of those flaws .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "26.4%", + "z-score": "0.361", + "p value": "0.359", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Based on experience elsewhere , it could take up to two years before regular elections are held , he added .\nSentence 2: U.S. military officials have said it could take up to two years before regular elections are held , based on experiences elsewhere in the world .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.15", + "p value": "0.875", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "156", + "Fraction of T in Greenlist": "78.4%", + "z-score": "17.4", + "p value": "4.57e-68", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.3221, 3.1844, 3.4207, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.4285, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.6329, 10.7518, 10.8699,\n 10.9870, 10.8790, 10.9955, 10.8889, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.6894, 11.7992, 11.9083, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.6529, 12.7567, 12.8598,\n 12.9624, 13.0643, 12.9639, 13.0655, 12.9662, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.4674, 13.5654, 13.6629, 13.7599, 13.8564,\n 13.9524, 14.0479, 14.1429, 14.2374, 14.3314, 14.4250, 14.5181, 14.6107,\n 14.7029, 14.7947, 14.8860, 14.7920, 14.8831, 14.7899, 14.6976, 14.7885,\n 14.8790, 14.9691, 15.0588, 15.1481, 15.2369, 15.3254, 15.4135, 15.5012,\n 15.5885, 15.6754, 15.7619, 15.8481, 15.9339, 16.0194, 16.1045, 16.1892,\n 16.2736, 16.3577, 16.4414, 16.5247, 16.4361, 16.5193, 16.4314, 16.3441,\n 16.4272, 16.5100, 16.5925, 16.6746, 16.7564, 16.8379, 16.9191, 17.0000,\n 17.0806, 17.1609, 17.2408, 17.3205, 17.3999, 17.3149, 17.3941])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The first biotechnology treatment for asthma , the constriction of the airways that affects millions around the world , received approval from the US Food and Drug Administration yesterday .\nSentence 2: The first biotechnology treatment for asthma , the constriction of the airways that affects millions of Americans , received approval from the U.S. Food and Drug Administration on Friday .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "69", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "29.0%", + "z-score": "0.765", + "p value": "0.222", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 1.1446, 1.3926, 1.6353, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: FBI agents arrested a former partner of Big Four accounting firm Ernst & Young ERNY.UL on criminal charges of obstructing federal investigations , U.S. officials said on Thursday .\nSentence 2: A former partner of accountancy firm Ernst & Young was yesterday arrested by FBI agents in the US on charges of obstructing federal investigations .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "179", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "20.7%", + "z-score": "-1.34", + "p value": "0.91", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.5010, -1.5479, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.6632, -1.5275, -1.5667,\n -1.6057, -1.4713, -1.3377])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.0186, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.3100, 5.1962,\n 5.0844, 5.2590, 5.1490, 5.3211, 5.4909, 5.3825, 5.2760, 5.1711,\n 5.0680, 4.9666, 5.1333, 5.2981, 5.4610, 5.6220, 5.7812, 5.9386,\n 6.0943, 5.9932, 5.8936, 6.0474, 6.1996, 6.3502, 6.2517, 6.4008,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.1283,\n 6.2725, 6.1820, 6.3248, 6.4663, 6.3768, 6.2883, 6.2008, 6.1143,\n 6.0288, 6.1685, 6.3070, 6.4444, 6.5807, 6.7159, 6.8500, 6.9830,\n 6.8977, 6.8133, 6.9451, 7.0759, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.7155, 7.8406, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 7.9608,\n 7.8791, 7.7981, 7.7178, 7.8393, 7.9600, 7.8803, 7.8014, 7.7232,\n 7.6456, 7.7653, 7.6883, 7.8072, 7.9253, 7.8489, 7.7732, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.9729, 8.0880, 8.2024, 8.3162, 8.4293,\n 8.3550, 8.2813, 8.3937, 8.5054, 8.6165, 8.7270, 8.8369, 8.9461,\n 9.0548, 9.1629, 9.0895, 9.0167, 9.1242, 9.0518, 8.9800, 9.0869,\n 9.1932, 9.1218, 9.2276, 9.1567, 9.2619, 9.3665, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.8054, 9.9060, 9.8373, 9.9374, 9.8691, 9.9687, 10.0679, 10.0000,\n 10.0987, 10.0312, 10.1295, 10.2273, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Kelly will begin meetings with Russian Deputy Foreign Minister Alexander Losyukov in Washington on Monday .\nSentence 2: Russian Deputy Foreign Minister Alexander Losyukov said in Moscow Tuesday a firm date would be fixed by this months end .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321, 1.5403, 1.9052,\n 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570, 2.1939, 2.4910, 2.3333,\n 2.1822, 2.4659, 2.3190, 2.5924, 2.8577, 3.1156, 3.3665, 3.6108, 3.8490,\n 4.0814, 4.3083, 4.1603, 4.0166, 4.2378, 4.0980, 3.9620, 4.1779, 4.3894,\n 4.2563, 4.4634, 4.6667, 4.8662, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712,\n 4.7488, 4.6291, 4.5118, 4.3970, 4.5850, 4.7703, 4.6571, 4.5461, 4.7281,\n 4.6188, 4.7980, 4.9747, 5.1490, 5.3211, 5.4909, 5.6585, 5.5500, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.0332, 4.9346, 5.0990, 5.2615, 5.1640, 5.0679,\n 5.2281, 5.1332, 5.2915, 5.4482, 5.6032, 5.7566, 5.9084, 6.0587, 5.9641,\n 5.8707, 5.7785, 5.6875, 5.5976, 5.5088, 5.4212, 5.5690, 5.7155, 5.6285,\n 5.5426, 5.6874, 5.6023, 5.7457, 5.8878, 6.0288, 6.1685, 6.3070, 6.4444,\n 6.3595, 6.2755, 6.1924, 6.1101, 6.0287, 5.9481, 5.8684, 5.7894, 5.9247,\n 6.0590, 5.9806, 5.9029, 6.0359, 5.9589, 6.0908, 6.0143, 6.1451, 6.2750,\n 6.1990, 6.1237, 6.0491, 5.9752, 5.9019, 6.0302, 6.1577, 6.0848, 6.2113,\n 6.3369, 6.4618, 6.5857, 6.7089, 6.6361, 6.5639, 6.4923, 6.4213, 6.3509,\n 6.2810, 6.2116, 6.3333, 6.4543, 6.3853, 6.3168, 6.4368, 6.3688, 6.4880,\n 6.6064, 6.7242, 6.8413, 6.9577, 7.0735, 7.0054, 6.9378, 6.8707, 6.8041,\n 6.7380, 6.6724, 6.6072, 6.7217, 6.8355, 6.7706, 6.7061, 6.8192, 6.7551,\n 6.8675, 6.9793, 7.0905, 7.2012, 7.3113, 7.4208, 7.3566, 7.2929, 7.2296,\n 7.1667, 7.1041, 7.0420, 6.9803, 7.0888, 7.1967, 7.1352, 7.0741, 7.1813,\n 7.1205, 7.2272, 7.3333, 7.4390, 7.5441, 7.6488, 7.7530, 7.6922, 7.6317,\n 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The latest shooting linked to the spree was a November 11 shooting at Hamilton Central Elementary School in Obetz , about 3km from the freeway .\nSentence 2: Another shooting linked to the spree occurred Nov. 11 at Hamilton Central Elementary in Obetz , about two miles from the freeway .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.9795, 2.2133, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.3445, 2.5621, 2.4585, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.4930, 2.6914, 2.8868,\n 2.7928, 2.9848, 3.1741, 3.0806, 2.9887, 2.8983, 3.0833, 2.9938,\n 2.9057, 3.0873, 3.0000, 2.9140, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.0151, 2.9329, 2.8518, 2.7717, 2.9433, 3.1129, 3.0330, 2.9542,\n 3.1211, 3.0429, 2.9656, 3.1300, 3.0533, 2.9775, 2.9025, 3.0641,\n 3.2242, 3.3826, 3.3075, 3.4641, 3.6193, 3.5443, 3.4702, 3.3968,\n 3.5496, 3.4768, 3.4047, 3.5556, 3.4839, 3.4130, 3.5620, 3.7097,\n 3.8562, 4.0015, 3.9302, 3.8596, 3.7897, 3.9331, 4.0753, 4.0056,\n 3.9365, 4.0771, 4.0085, 3.9404, 4.0795, 4.0119, 3.9448, 3.8784,\n 4.0158, 4.1522, 4.2877, 4.2212, 4.3554, 4.4888, 4.4224, 4.3566,\n 4.2914, 4.4233, 4.3583, 4.2940, 4.4246, 4.3605, 4.2970, 4.4264,\n 4.5549, 4.4915, 4.4286, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.4342, 4.3733, 4.3128, 4.4376, 4.3774, 4.3176, 4.4413, 4.3818,\n 4.3226, 4.2639, 4.3865, 4.5083, 4.6295, 4.5707, 4.6911, 4.8107,\n 4.7520, 4.6938, 4.6359, 4.7544, 4.6968, 4.6395, 4.7572, 4.7001,\n 4.6434, 4.7602, 4.8763, 4.9918, 5.1068, 5.0499, 4.9934, 4.9373,\n 5.0513, 5.1647, 5.1086, 5.0529, 5.1655, 5.1100, 5.0548, 5.1667,\n 5.1117, 5.0571, 5.0027, 5.1137, 5.2241, 5.3340, 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641, 3.2206, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.7080, 2.5281, 2.3570, 2.6558, 2.9439, 2.7778,\n 3.0551, 3.3235, 3.5839, 3.8367, 4.0825, 3.9196, 3.7626, 4.0012, 4.2339,\n 4.0814, 4.3083, 4.5301, 4.7469, 4.9592, 5.1671, 5.0186, 5.2223, 5.0779,\n 5.2778, 5.1371, 5.3333, 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283,\n 5.9944, 6.1721, 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648,\n 6.6395, 6.8031, 6.6803, 6.5597, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714, 7.1207,\n 7.2684, 7.4146, 7.3073, 7.2016, 7.3464, 7.2421, 7.1393, 7.0379, 6.9378,\n 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.3068, 7.2104, 7.3485, 7.2532,\n 7.3901, 7.5258, 7.6603, 7.7937, 7.9259, 7.8318, 7.7387, 7.8699, 7.7778,\n 7.6867, 7.5967, 7.5076, 7.4194, 7.5494, 7.6785, 7.8065, 7.7192, 7.8463,\n 7.7598, 7.8859, 7.8003, 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.3324,\n 8.2483, 8.3691, 8.2858, 8.2032, 8.1214, 8.0403, 7.9600, 8.0798, 8.1989,\n 8.3172, 8.2375, 8.3550, 8.2760, 8.3927, 8.3143, 8.4303, 8.5456, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.8448, 8.7681, 8.6921, 8.6166, 8.5417,\n 8.4674, 8.5792, 8.6903, 8.8008, 8.7270, 8.8369, 8.7636, 8.8728, 8.8000,\n 8.9086, 9.0167, 9.1242, 9.2311, 9.3374, 9.2651, 9.1932, 9.2990, 9.2276,\n 9.1567, 9.0863, 9.0164, 8.9469, 9.0520, 9.1566, 9.2607, 9.1916, 9.2952,\n 9.2265, 9.3295, 9.2613, 9.3638, 9.4658, 9.5673, 9.6684, 9.7690, 9.7011,\n 9.6336, 9.7337, 9.6667, 9.6000, 9.5338, 9.4680, 9.4026, 9.5021, 9.6011,\n 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The Dow Jones Industrial Average ended down 128 points , or 1.4 % , at 9073 , while the Nasdaq fell 34 points , or 2.1 % , to 1610 .\nSentence 2: In early trading , the Dow Jones industrial average was up 3.90 , or 0.04 percent , at 9,113.75 , having gained 36.90 on Tuesday .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "158", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.56", + "p value": "0.941", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.4077, -2.2151, -2.2629, -2.0732, -2.1213,\n -1.9345, -1.9829, -1.7988, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.6690, -1.7154, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.7723, 7.9115, 8.0495, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 9.0057, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.3320, 9.4474, 9.5620, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.8430, 9.7590, 9.6757, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.1955, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.0177, 10.9422, 10.8673, 10.9669,\n 10.8925, 10.9917, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.2607, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: PDC will also almost certainly fan the flames of speculation about Longhorn 's release .\nSentence 2: PDC will also almost certainly reignite speculation about release dates of Microsoft 's new products .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "91", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "33.0%", + "z-score": "1.76", + "p value": "0.0396", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.0290, 2.9055, 2.7852, 2.6681, 2.5538, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 2.7757, 2.6713, 2.5690, 2.4689,\n 2.3706, 2.2743, 2.1798, 2.0870, 2.2916, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.0455, 1.9604, 2.1546, 2.3462, 2.2611, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.7951, 1.7233,\n 1.8972, 1.8257, 1.7552])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Sales - a figure watched closely as a barometer of its health - rose 5 percent instead of falling as many industry experts had predicted .\nSentence 2: It also disclosed that sales -- a figure closely watched by analysts as a barometer of its health -- were significantly higher than industry experts expected .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.4685, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.4944, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.4233, 0.3800, 0.3369, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 7.6615, 7.4838, 7.3113, 7.4878, 7.6613, 7.8320, 7.6667,\n 7.5056, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.0358, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 8.9815,\n 8.8522, 8.7250, 8.8667, 9.0068, 9.1455, 9.0213, 8.8991, 8.7788,\n 8.9169, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.5909, 10.4903, 10.6061, 10.5067, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.0476, 12.1492, 12.0611, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.6103, 12.5264, 12.6234, 12.7199, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.0185, 12.9363, 13.0307, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.5069, 13.4263, 13.5179, 13.4380, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.6429, 13.7327, 13.8222, 13.9113, 13.8333,\n 13.9221, 14.0106, 13.9332, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: NEC is pitching its wireless gear and management software to a variety of industries , including health care and hospitality .\nSentence 2: NEC 's pitching its wireless gear and management software to a variety of industries , including healthcare and hospitality , a company spokesman said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "106", + "# Tokens in Greenlist": "18", + "Fraction of T in Greenlist": "17.0%", + "z-score": "-1.91", + "p value": "0.972", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.0656,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.0196, -2.0692, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -1.9066])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.1864, 5.0684, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 5.9346, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.7006, 6.8483, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.6064, 7.7387, 7.8699, 8.0000, 8.1291, 8.0370, 7.9460, 8.0741,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.0680, 8.9815,\n 8.8958, 9.0134, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.0267, 8.9448, 8.8636, 8.7831, 8.8978, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.2232, 9.1452, 9.0679, 8.9912,\n 9.1018, 9.0257, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.9642, 9.8918, 9.9944, 9.9224,\n 10.0245, 9.9531, 9.8821, 9.9837, 10.0848, 10.0143, 10.1149, 10.2151,\n 10.3148, 10.2447, 10.3439, 10.2743, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Elena Slough , considered to be the nation 's oldest person and the third oldest person in the world , died early Sunday morning .\nSentence 2: ELENA Slough , considered to be the oldest person in the US and the third oldest person in the world , has died .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.4393, -2.4744, -2.5092, -2.5439, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.1584, 5.9797, 6.1815, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 7.9489, 7.8174, 7.9704, 7.8416, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.5191, 9.4088, 9.2999, 9.4281,\n 9.3207, 9.2147, 9.3422, 9.2376, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 10.1379, 10.0380,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.6719, 10.5769, 10.6894, 10.5955, 10.5025, 10.6145,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 11.8870, 11.8010,\n 11.7157, 11.8172, 11.7326, 11.6487, 11.7498, 11.6666, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.1805, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.3100, 12.4065, 12.3263, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 12.9099, 12.8313, 12.7532, 12.8464, 12.7688,\n 12.6918, 12.7847, 12.7082, 12.8007, 12.8928, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.1966, 13.2864, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" We are declaring war on sexual harassment and sexual assault .\nSentence 2: \" We have declared war on sexual assault and sexual harassment , \" Rosa said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "92", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "29.3%", + "z-score": "0.963", + "p value": "0.168", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, 0.0000, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.0289, 0.9631])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.0855, 9.9817, 9.8792, 10.0000, 9.8987, 10.0188, 9.9187, 9.8198,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.7678, 9.8858, 9.7912, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.1157, 10.2287, 10.1391, 10.2514, 10.1627, 10.2743, 10.1865, 10.0995,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.3333, 10.2509, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.8984, 10.8200, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.3688, 11.4674, 11.3910, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.4300, 11.5261, 11.6217, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The technology-laced Nasdaq Composite Index < .IXIC > added 1.92 points , or 0.12 percent , at 1,647.94 .\nSentence 2: The technology-laced Nasdaq Composite Index .IXIC dipped 0.08 of a point to 1,646 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.9%", + "z-score": "-1.32", + "p value": "0.907", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -0.9949, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.1111, -0.9401, -0.9909, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.2516, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.3904, -1.2423, -1.0952, -1.1390, -1.1825, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.6466, -1.5159, -1.3859, -1.4241, -1.4621, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.3197])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 8.0408, 7.8003, 7.9845, 8.1650,\n 8.3418, 8.5153, 8.2952, 8.4678, 8.6373, 8.8039, 8.9677, 9.1287,\n 9.2872, 9.4432, 9.5969, 9.7483, 9.8976, 10.0448, 10.1900, 10.3333,\n 10.1449, 10.2879, 10.4290, 10.5685, 10.7062, 10.5280, 10.6654, 10.8012,\n 10.9355, 11.0682, 11.1995, 11.3294, 11.4579, 11.2918, 11.1291, 11.2583,\n 11.0998, 11.2286, 11.0739, 11.2022, 11.0513, 11.1791, 11.3056, 11.1588,\n 11.2848, 11.1412, 11.2667, 11.1261, 11.2510, 11.1132, 11.2376, 11.3608,\n 11.2263, 11.3489, 11.2169, 11.0870, 11.2094, 11.0818, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.6743, 11.5525, 11.6693, 11.5494,\n 11.4311, 11.3143, 11.1990, 11.0851, 10.9727, 11.0902, 10.9794, 10.8699,\n 10.9870, 11.1033, 10.9955, 11.1111, 11.0047, 11.1197, 11.0147, 10.9109,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.2623, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.5048, 11.6139, 11.7222, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.7647, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 11.9187, 12.0218, 12.1244,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.5503,\n 12.4625, 12.3754, 12.4746, 12.5732, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 12.9952, 12.9116, 13.0067,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.4263, 13.5179, 13.6091, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 14.0687, 13.9897, 13.9113, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.4382, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The dollar was at 117.85 yen against the Japanese currency , up 0.1 percent .\nSentence 2: Against the Swiss franc the dollar was at 1.3289 francs , up 0.5 percent on the day .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.0646, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.6241, -1.6632, -1.7021, -1.5667,\n -1.6057, -1.4713, -1.5104, -1.5492, -1.4162, -1.4551, -1.4938, -1.3620,\n -1.2310, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.0483, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 6.1283,\n 5.8890, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.4878, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 8.9935, 9.1333, 9.2717, 9.1455, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.1890, 9.0711, 9.2055, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.5368, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 11.9594, 12.0611, 11.9737, 12.0749, 11.9883,\n 11.9024, 11.8172, 11.9181, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.4289, 12.3455, 12.4430, 12.3603, 12.4575, 12.3754, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 12.9621, 13.0558,\n 12.9755, 13.0688, 12.9891, 12.9099, 12.8313, 12.9244, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.3967, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: El Watan , an Algerian newspaper , reported that the kidnappers fiercely resisted the army assault this morning , firing Kalashnikov rifles .\nSentence 2: El Watan , an Algerian newspaper , reported that the kidnappers put up fierce resistance during the army assault , firing Kalashnikov rifles .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "192", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "24.0%", + "z-score": "-0.333", + "p value": "0.631", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, 0.1063, 0.2646, 0.4216,\n 0.3674, 0.5227, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.7246, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 11.7907, 11.9062, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.3447, 12.4550,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.7743, 12.8813, 12.9875, 12.8749,\n 12.9807, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.4804, 13.5813, 13.6816, 13.7813, 13.8804,\n 13.9790, 13.8745, 13.9728, 14.0705, 14.1677, 14.0649, 13.9630, 13.8621,\n 13.7621, 13.8595, 13.7606, 13.8578, 13.7599, 13.6629, 13.5668, 13.4715,\n 13.5688, 13.6656, 13.5714, 13.4780, 13.3854, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.3059, 13.2166, 13.1279, 13.0400, 13.1364, 13.0493, 12.9628,\n 12.8769, 12.9732, 13.0690, 12.9840, 12.8997, 12.8160, 12.7329, 12.6504,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.0030, 12.9244, 12.8464, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.8007, 12.7248, 12.6494, 12.5745, 12.5000,\n 12.4260, 12.3525, 12.2794, 12.2068, 12.1347, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: But Mitsubishi Tokyo Financial ( JP : 8306 : news , chart , profile ) declined 3,000 yen , or 0.65 percent , to 456,000 yen .\nSentence 2: Sumitomo Mitsui Financial ( JP : 8316 : news , chart , profile ) was down 2.5 percent at 198,000 yen .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, 0.1601, 0.3189, 0.2646, 0.4216,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.1761, 0.1317, 0.0875, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n 0.1273, 0.0847, 0.2111, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.2054, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.8058, 9.9384, 10.0698,\n 9.9433, 9.8187, 9.6960, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.4932, 10.6145, 10.5027, 10.6232, 10.5131, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 10.8995, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.2623, 11.3740, 11.4849, 11.3842,\n 11.4945, 11.3950, 11.2966, 11.4065, 11.3091, 11.2127, 11.1172, 11.0227,\n 11.1324, 11.0389, 10.9462, 10.8544, 10.7635, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 10.8224, 10.7349, 10.6481, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 10.9048, 10.8204, 10.7367, 10.6537, 10.5714, 10.6771,\n 10.5955, 10.5145, 10.4341, 10.5393, 10.4596, 10.5642, 10.4852, 10.4067,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.1933, 11.1173, 11.0418, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.3608, 11.4581, 11.3837, 11.4806, 11.5771,\n 11.5033, 11.5993, 11.5261, 11.4533, 11.5489, 11.4766, 11.4047, 11.3333,\n 11.2624, 11.3577, 11.2872, 11.2171, 11.1475, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" We 're just dealing with bragging rights here , who wins and who loses . \"\nSentence 2: \" Leaving aside attorney fees , we 're dealing with bragging rights of who wins and who loses , \" said Gammerman .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "12", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "25.0%", + "z-score": "0", + "p value": "0.5", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.2549, 5.1241, 4.9962, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.4897, 7.3855,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.6033, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 7.9853, 7.8889, 7.7937, 7.6995,\n 7.6064, 7.5143, 7.4233, 7.3333, 7.2443, 7.3765, 7.5076, 7.4194,\n 7.3322, 7.2459, 7.1605, 7.2904, 7.4193, 7.5472, 7.4625, 7.5895,\n 7.5056, 7.6315, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.7610, 8.8778, 8.7952, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.0923, 9.0117, 9.1250, 9.0452,\n 9.1577, 9.0786, 9.1905, 9.1119, 9.2232, 9.3338, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.5066, 9.4299, 9.5381, 9.6456, 9.5695, 9.4939,\n 9.6008, 9.5258, 9.6322, 9.7380, 9.6635, 9.5896, 9.5161, 9.4432,\n 9.3708, 9.2990, 9.2276, 9.3328, 9.4375, 9.5416, 9.6452, 9.5743,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.7735, 9.8746,\n 9.8054, 9.7367, 9.8373, 9.9374, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Shares of Hartford rose $ 2.88 to $ 46.50 in New York Stock Exchange composite trading .\nSentence 2: Shares of Hartford were up $ 2.28 , or 5.2 percent , to $ 45.90 in midday trading .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "186", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "22.0%", + "z-score": "-0.931", + "p value": "0.824", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.1644, -0.2182,\n -0.0543, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.2431, -0.2907, -0.1448, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, -0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "163", + "Fraction of T in Greenlist": "81.9%", + "z-score": "18.5", + "p value": "4.91e-77", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.0014, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.8618, 12.9639, 13.0655, 13.1665, 13.2669, 13.3667, 13.4660,\n 13.5647, 13.6630, 13.7606, 13.8578, 13.9544, 14.0505, 13.9531, 14.0489,\n 14.1442, 14.2390, 14.3333, 14.4272, 14.5206, 14.6135, 14.7060, 14.7981,\n 14.8896, 14.9808, 15.0715, 15.1618, 15.2517, 15.3411, 15.4302, 15.5188,\n 15.6070, 15.6949, 15.7823, 15.8694, 15.9561, 16.0424, 16.1283, 16.2139,\n 16.2990, 16.3839, 16.4684, 16.5525, 16.6363, 16.5446, 16.6282, 16.7115,\n 16.7944, 16.8770, 16.9592, 17.0411, 17.1227, 17.2040, 17.2850, 17.3656,\n 17.4460, 17.5260, 17.6058, 17.6852, 17.7643, 17.8432, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.3103, 18.3871, 18.4637, 18.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: This Palm OS smart phone is the last product the company will release before it becomes a part of palmOne .\nSentence 2: This was almost certainly its last full quarter before the company becomes a part of Palm .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "25.7%", + "z-score": "0.187", + "p value": "0.426", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.4436, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: And they think the protein probably is involved in the spread of other forms of cancer .\nSentence 2: They researchers say the research could be relevant to other forms of cancer .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "68.0%", + "z-score": "9.79", + "p value": "6.23e-23", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Tokyo Electric Power Co . , Asia 's largest power company , won approval to restart the first of 17 nuclear reactors it shut down after it admitted falsifying inspection reports .\nSentence 2: Tokyo Electric Power Co . , Asia 's largest power company , restarted the first of 17 nuclear reactors it shut down after admitting it falsified inspection reports .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.7963, 1.6977, 1.9215, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.6131, 1.5275, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.0461, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.0829, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Tuition at four-year private colleges averaged $ 19,710 this year , up 6 percent from 2002 .\nSentence 2: For the current academic year , tuition at public colleges averaged $ 4,694 , up almost $ 600 from the year before .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -1.8333,\n -1.7039, -1.5752, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.9646, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.9495, 8.0829, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.6770, 8.5896, 8.7104, 8.8304, 8.7439, 8.6581, 8.7773,\n 8.6924, 8.8108, 8.9285, 9.0453, 8.9612, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.4608, 9.3810, 9.3017, 9.4124, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.7224, 9.6456, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.7380, 9.8433, 9.9481, 9.8736, 9.9778,\n 10.0814, 10.0074, 10.1106, 10.0371, 9.9642, 10.0668, 10.1690, 10.0965,\n 10.0245, 10.1262, 10.0547, 10.1558, 10.2565, 10.3566, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.5128, 10.4427, 10.3730, 10.4713, 10.5692, 10.5000,\n 10.5974, 10.6944, 10.6256, 10.7222, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Security lights have also been installed and police have swept the grounds for booby traps .\nSentence 2: Security lights have also been installed on a barn near the front gate .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.41", + "p value": "0.921", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.9826, -2.0428, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.2667, -1.0596, -1.1185, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -1.9673, -2.0101, -2.0526, -2.0948, -1.9379, -1.9803,\n -1.8251, -1.8676, -1.9098, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.7679, -1.6231, -1.4792, -1.5206, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.4087, -1.4485, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.5539, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.4093])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 4.9008, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.0000, 4.8008, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.4425, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 7.9853, 7.8889, 7.7937, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.5057, 9.4185, 9.3320, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.6186, 9.5346, 9.4513, 9.3686, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.5714, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.2753, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.3827, 10.3065, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.5921, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.5725, 10.5001, 10.5998, 10.5278, 10.4563, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.8770, 10.9740, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He said that with the U.S.-backed peace plan , or road map , \u201c in a coma , \u201d the attack could easily widen conflict through the region .\nSentence 2: Mr Jouejati said that with the US-backed road map \" in a coma \" the attack could easily widen through the region .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "71", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-0.754", + "p value": "0.774", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426, 4.0415, 4.3027, 4.5556,\n 4.3644, 4.6101, 4.4272, 4.6663, 4.4907, 4.3217, 4.1586, 4.3916, 4.2339,\n 4.4610, 4.3083, 4.1603, 4.0166, 3.8772, 3.7417, 3.9620, 4.1779, 4.3894,\n 4.5968, 4.8003, 4.6667, 4.5363, 4.4091, 4.6082, 4.8038, 4.6790, 4.8712,\n 4.7488, 4.6291, 4.5118, 4.7002, 4.5850, 4.7703, 4.6571, 4.8394, 4.7281,\n 4.6188, 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.4222, 5.3245,\n 5.2281, 5.3867, 5.2915, 5.4482, 5.6032, 5.5090, 5.6622, 5.5691, 5.7207,\n 5.6286, 5.5377, 5.6875, 5.8358, 5.9827, 6.1283, 6.2725, 6.4153, 6.3248,\n 6.4663, 6.3768, 6.2883, 6.4283, 6.5672, 6.7049, 6.8414, 6.9768, 7.1111,\n 7.0231, 6.9361, 7.0692, 6.9830, 7.1149, 7.0296, 7.1605, 7.2904, 7.2058,\n 7.3346, 7.4625, 7.5895, 7.7155, 7.8406, 7.7566, 7.6734, 7.7976, 7.7152,\n 7.8384, 7.9608, 7.8791, 8.0006, 7.9196, 8.0403, 7.9600, 7.8803, 7.8014,\n 7.9212, 7.8429, 7.9619, 7.8842, 7.8072, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.0880, 8.0139, 8.1282, 8.0546, 8.1683,\n 8.2813, 8.2082, 8.3205, 8.2479, 8.1758, 8.1043, 8.2158, 8.1448, 8.2557,\n 8.1851, 8.2954, 8.2252, 8.1556, 8.2652, 8.3742, 8.3050, 8.4133, 8.3446,\n 8.2762, 8.3840, 8.4911, 8.5978, 8.7039, 8.6359, 8.5683, 8.6738, 8.6066,\n 8.5399, 8.6448, 8.7492, 8.6828, 8.7867, 8.8900, 8.9929, 9.0952, 9.1971,\n 9.2986, 9.2324, 9.1667, 9.2676, 9.2022, 9.3026, 9.4026, 9.3375, 9.4370,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: A successful attack could be launched within any type of document that supports VBA , including Microsoft Word , Excel or PowerPoint .\nSentence 2: But this could happen with any document format that supports VBA , including Word , Excel or PowerPoint .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.8295, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 1.0973, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.8340, 0.9629, 0.9165, 1.0445,\n 0.9981, 1.1251, 1.2514, 1.2049, 1.1587, 1.1127, 1.2377, 1.1918,\n 1.1461, 1.2700, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.1990, 1.1547, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Officials at Brandeis said this was an \" extremely heartrending \" time for the campus .\nSentence 2: \" This is an extremely heartrending time for the entire Brandeis University community .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.4685, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.4944, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.4233, 0.3800, 0.3369, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" If that ain 't a Democrat , I must be at the wrong meeting , \" he said .\nSentence 2: And if that ain 't a Democrat , then I must be in the wrong meeting , \" he said to thunderous applause from his supporters .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "1.5%", + "z-score": "-7.65", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.5820, -2.6458, -2.7080, -2.7689, -2.8284,\n -2.8868, -2.9439, -3.0000, -3.0551, -3.1091, -3.1623, -3.2146, -3.2660,\n -3.3166, -3.3665, -3.4157, -3.4641, -3.5119, -3.5590, -3.6056, -3.6515,\n -3.6968, -3.7417, -3.7859, -3.8297, -3.8730, -3.9158, -3.9581, -4.0000,\n -4.0415, -4.0825, -3.7997, -3.8431, -3.8860, -3.9284, -3.9703, -4.0119,\n -4.0530, -4.0937, -4.1341, -4.1740, -4.2136, -4.2528, -4.2916, -4.3301,\n -4.3683, -4.4061, -4.4437, -4.4809, -4.5178, -4.5544, -4.5908, -4.6268,\n -4.6626, -4.6981, -4.7333, -4.7683, -4.8030, -4.8375, -4.8718, -4.9058,\n -4.9396, -4.9731, -5.0064, -5.0395, -5.0724, -5.1051, -5.1376, -5.1698,\n -5.2019, -5.2338, -5.2655, -5.2970, -5.3283, -5.3594, -5.3904, -5.4212,\n -5.4518, -5.4822, -5.5125, -5.5426, -5.5725, -5.6023, -5.6319, -5.6614,\n -5.6907, -5.7199, -5.7489, -5.7778, -5.8065, -5.8351, -5.6444, -5.6737,\n -5.7028, -5.7318, -5.7607, -5.7894, -5.8180, -5.8464, -5.8747, -5.9029,\n -5.9310, -5.9589, -5.9867, -6.0143, -6.0419, -6.0693, -6.0966, -6.1237,\n -6.1508, -6.1777, -6.2045, -6.2312, -6.2578, -6.2843, -6.3107, -6.3369,\n -6.3631, -6.3892, -6.4151, -6.4409, -6.4667, -6.4923, -6.5179, -6.5433,\n -6.5686, -6.5939, -6.6190, -6.6441, -6.6691, -6.6939, -6.7187, -6.7434,\n -6.7680, -6.7925, -6.8170, -6.8413, -6.8656, -6.8897, -6.9138, -6.9378,\n -6.9617, -6.9856, -7.0093, -7.0330, -7.0566, -7.0801, -7.1036, -7.1270,\n -6.9726, -6.9964, -7.0200, -7.0436, -7.0671, -7.0905, -7.1139, -7.1372,\n -7.1604, -7.1835, -7.2066, -7.2296, -7.2525, -7.2753, -7.2981, -7.3208,\n -7.3434, -7.3660, -7.3885, -7.4109, -7.4333, -7.4556, -7.4778, -7.5000,\n -7.5221, -7.5441, -7.5661, -7.5880, -7.6099, -7.6317, -7.6534])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.1968, 11.2992, 11.4009, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.3610, 11.4614, 11.5613, 11.6606, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.2209, 12.3143, 12.4074, 12.3333,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Fewer than a dozen FBI agents were dispatched to secure and analyze evidence .\nSentence 2: Fewer than a dozen FBI agents will be sent to Iraq to secure and analyze evidence of the bombing .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.0250, -2.0889, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.6449, -1.6997, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.7990, -1.8411, -1.8829, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.7857, -1.8257,\n -1.8656, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.7408,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Those who only had surgery lived an average of 46 months .\nSentence 2: For those who got surgery alone , median survival was 41 months .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "1.5%", + "z-score": "-7.65", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.5820, -2.6458, -2.7080, -2.7689, -2.8284,\n -2.8868, -2.9439, -3.0000, -3.0551, -3.1091, -3.1623, -3.2146, -3.2660,\n -3.3166, -3.3665, -3.4157, -3.4641, -3.5119, -3.5590, -3.6056, -3.6515,\n -3.6968, -3.7417, -3.7859, -3.8297, -3.8730, -3.9158, -3.9581, -4.0000,\n -4.0415, -4.0825, -3.7997, -3.8431, -3.8860, -3.9284, -3.9703, -4.0119,\n -4.0530, -4.0937, -4.1341, -4.1740, -4.2136, -4.2528, -4.2916, -4.3301,\n -4.3683, -4.4061, -4.4437, -4.4809, -4.5178, -4.5544, -4.5908, -4.6268,\n -4.6626, -4.6981, -4.7333, -4.7683, -4.8030, -4.8375, -4.8718, -4.9058,\n -4.9396, -4.9731, -5.0064, -5.0395, -5.0724, -5.1051, -5.1376, -5.1698,\n -5.2019, -5.2338, -5.2655, -5.2970, -5.3283, -5.3594, -5.3904, -5.4212,\n -5.4518, -5.4822, -5.5125, -5.5426, -5.5725, -5.6023, -5.6319, -5.6614,\n -5.6907, -5.7199, -5.7489, -5.7778, -5.8065, -5.8351, -5.6444, -5.6737,\n -5.7028, -5.7318, -5.7607, -5.7894, -5.8180, -5.8464, -5.8747, -5.9029,\n -5.9310, -5.9589, -5.9867, -6.0143, -6.0419, -6.0693, -6.0966, -6.1237,\n -6.1508, -6.1777, -6.2045, -6.2312, -6.2578, -6.2843, -6.3107, -6.3369,\n -6.3631, -6.3892, -6.4151, -6.4409, -6.4667, -6.4923, -6.5179, -6.5433,\n -6.5686, -6.5939, -6.6190, -6.6441, -6.6691, -6.6939, -6.7187, -6.7434,\n -6.7680, -6.7925, -6.8170, -6.8413, -6.8656, -6.8897, -6.9138, -6.9378,\n -6.9617, -6.9856, -7.0093, -7.0330, -7.0566, -7.0801, -7.1036, -7.1270,\n -6.9726, -6.9964, -7.0200, -7.0436, -7.0671, -7.0905, -7.1139, -7.1372,\n -7.1604, -7.1835, -7.2066, -7.2296, -7.2525, -7.2753, -7.2981, -7.3208,\n -7.3434, -7.3660, -7.3885, -7.4109, -7.4333, -7.4556, -7.4778, -7.5000,\n -7.5221, -7.5441, -7.5661, -7.5880, -7.6099, -7.6317, -7.6534])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "75.3%", + "z-score": "16.2", + "p value": "4.38e-59", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.4936, 8.6461, 8.7967, 8.6522, 8.8015, 8.9489,\n 9.0947, 8.9544, 9.0990, 9.2418, 9.3831, 9.2469, 9.3871, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.5366, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 9.9495, 10.0791, 10.2075, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.4704, 10.3532, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.4932, 10.6145, 10.7348, 10.8542, 10.9727, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.8915, 11.7838,\n 11.6772, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.4405, 12.3377, 12.4434, 12.5485, 12.6529, 12.5517, 12.6557,\n 12.7590, 12.8618, 12.7622, 12.8645, 12.7660, 12.8679, 12.7704, 12.8719,\n 12.9728, 12.8766, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.2791,\n 13.3770, 13.4745, 13.3810, 13.4780, 13.5746, 13.6707, 13.7663, 13.6742,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.9615, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.8789, 14.9677, 14.8804, 14.9689, 15.0570, 15.1448,\n 15.2321, 15.3191, 15.4057, 15.4919, 15.5778, 15.6634, 15.7485, 15.6631,\n 15.7481, 15.8327, 15.9170, 15.8325, 15.9165, 16.0002, 16.0836, 16.0000,\n 16.0832, 16.1660])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Tonight a spokesman for Russia 's foreign ministry said the ministry may issue a statement on Thursday clarifying Russia 's position on cooperation with Iran 's nuclear-energy efforts .\nSentence 2: Tonight a spokesman for the Russian Foreign Ministry said it might issue a statement on Thursday clarifying Russia 's position on aiding Iran 's nuclear-energy efforts .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: A day earlier , a committee appointed by reformist President Mohammad Khatami called for an independent judicial inquiry into Kazemi 's death .\nSentence 2: A day earlier , a committee appointed by President Mohammad Khatami had called for an independent inquiry into the 54-year-old photojournalist 's death .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.4963, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.4565, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -1.8352, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.7246, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 11.7907, 11.9062, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.3447, 12.4550,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.7743, 12.8813, 12.9875, 12.8749,\n 12.9807, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.4804, 13.5813, 13.6816, 13.7813, 13.8804,\n 13.9790, 13.8745, 13.9728, 14.0705, 14.1677, 14.0649, 13.9630, 13.8621,\n 13.7621, 13.8595, 13.7606, 13.8578, 13.7599, 13.6629, 13.5668, 13.6640,\n 13.7606, 13.8567, 13.7619, 13.6679, 13.5746, 13.6707, 13.5784, 13.4868,\n 13.3960, 13.3059, 13.2166, 13.1279, 13.0400, 12.9527, 12.8661, 12.7802,\n 12.6949, 12.6103, 12.5264, 12.6234, 12.5401, 12.4575, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.4065, 12.3263, 12.4223, 12.3428, 12.4384, 12.5336,\n 12.4547, 12.3764, 12.2987, 12.3935, 12.3163, 12.2397, 12.1635, 12.0878,\n 12.0127, 11.9380, 11.8638, 11.7901, 11.7169, 11.6441, 11.5718, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.4765, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The suite comes complete with a word processor , spreadsheet , presentation software and other components , while continuing its tradition of utilizing an XML-based file format .\nSentence 2: The suite includes a word processor , spreadsheet , presentation application ( analogous to PowerPoint ) , and other components -- all built around the XML file format .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.0974, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.6333, -0.6737, -0.7139, -0.7539, -0.6266, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "61.6%", + "z-score": "7.23", + "p value": "2.41e-13", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.8924, 7.0557,\n 6.9282, 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.2304])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Downstream at Mount Vernon , the Skagit River was expected to crest at 36 feet -- 8 feet above flood stage -- tonight , Burke said .\nSentence 2: The Skagit was expected to crest during the night at 38 feet at Mount Vernon , 10 feet above flood stage , the National Weather Service said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "140", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.586", + "p value": "0.279", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.1206, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.1562, 0.0000, 0.1549, 0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.4407, 0.5855])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.1857, 7.3467, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.0667, 8.2121, 8.0928, 8.2369, 8.3795, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.5337, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.3582, 9.4803, 9.3834,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.1157, 10.2287, 10.1391, 10.2514, 10.3630, 10.4738, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.8838, 10.7987, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.3616, 11.2789, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.4209, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.4819, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.0114,\n 12.1076, 12.0302, 12.1260, 12.2214, 12.1447, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.2954, 12.3888, 12.3143, 12.4074, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The blue-chip Dow Jones industrial average eased 44 points , or 0.47 percent , to 9,543 , after scoring five consecutive up sessions .\nSentence 2: The Dow Jones industrial average .DJI rose 18.25 points , or 0.19 percent , to 9,586.71 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "22.5%", + "z-score": "-0.73", + "p value": "0.767", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.0050, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.5333, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.5593, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.3164, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 8.7913, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 8.9815,\n 8.8958, 9.0134, 8.9285, 9.0453, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.9542, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.5414, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 12.0712, 11.9927, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.2214, 12.3163, 12.2397, 12.3342, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.2954, 12.3888, 12.3143, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "accuracy_without_watermark": 0.53, + "accuracy_with_watermark": 0.44, + "f1_without_watermark": 0.6758620689655173, + "f1_with_watermark": 0.5882352941176471 + } + }, + "validation": { + "results": [ + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .\nSentence 2: \" The foodservice pie business does not fit our long-term growth strategy .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .\nSentence 2: His wife said he was \" 100 percent behind George Bush \" and looked forward to using his years of training in the war .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.4517, -1.4976, -1.5430, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.6641, -1.7049, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.6830, -1.7213, -1.7595, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.7039, -1.7410, -1.6125, -1.6496, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 3.5382,\n 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284, 3.1177, 2.9439, 3.2222,\n 3.0551, 3.3235, 3.1623, 3.0072, 3.2660, 3.1156, 3.3665, 3.6108, 3.4641,\n 3.7017, 3.9337, 3.7905, 3.6515, 3.8772, 3.7417, 3.9620, 4.1779, 4.3894,\n 4.5968, 4.8003, 5.0000, 4.8662, 5.0623, 4.9316, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.2463, 5.1236, 5.0034, 4.8857, 4.7703, 4.9528, 4.8394, 5.0190,\n 4.9075, 4.7980, 4.6904, 4.5847, 4.7610, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.5274, 4.4296, 4.3333, 4.5034, 4.4083, 4.3146, 4.2222, 4.3894, 4.2981,\n 4.4630, 4.6262, 4.5356, 4.6967, 4.6070, 4.7662, 4.9237, 4.8347, 4.7469,\n 4.6603, 4.8154, 4.7296, 4.8830, 4.7980, 4.9497, 4.8655, 5.0156, 5.1643,\n 5.3116, 5.4576, 5.6023, 5.7457, 5.8878, 5.8034, 5.7199, 5.6373, 5.7778,\n 5.9171, 6.0553, 5.9732, 5.8919, 5.8114, 5.9481, 6.0837, 6.0038, 6.1382,\n 6.0590, 5.9806, 5.9029, 5.8260, 5.9589, 5.8825, 6.0143, 5.9386, 6.0693,\n 5.9941, 5.9196, 5.8458, 5.9752, 6.1036, 6.0302, 6.1577, 6.0848, 6.2113,\n 6.1389, 6.2644, 6.3892, 6.3172, 6.2458, 6.3694, 6.2985, 6.4213, 6.5433,\n 6.6645, 6.5939, 6.5238, 6.4543, 6.3853, 6.5054, 6.4368, 6.5561, 6.4880,\n 6.6064, 6.7242, 6.6564, 6.7734, 6.7060, 6.8222, 6.9378, 6.8707, 6.8041,\n 6.7380, 6.8527, 6.7869, 6.9009, 6.8355, 6.9488, 6.8838, 6.9964, 7.1083,\n 7.2197, 7.3305, 7.4407, 7.5503, 7.6594, 7.5944, 7.5297, 7.4655, 7.5738,\n 7.6816, 7.7889, 7.7249, 7.6613, 7.5981, 7.7047, 7.8107, 7.7478, 7.8533,\n 7.7907, 7.7285, 7.6667, 7.6052, 7.7099, 7.6488, 7.7530, 7.6922, 7.7958,\n 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .\nSentence 2: The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.5010, -1.5479, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.4071, -1.4518, -1.4963, -1.5404, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.4535, -1.4967, -1.3472,\n -1.1987, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.3362, -1.3779, -1.4194, -1.2780,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.0974, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.1007, -1.1399, -1.1790, -1.2179, -1.0890, -1.1279, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "169", + "Fraction of T in Greenlist": "84.9%", + "z-score": "19.5", + "p value": "3.55e-85", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.3271, 7.5144, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 9.7312, 9.8754,\n 10.0178, 10.1585, 10.2976, 10.4350, 10.5709, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 11.2022, 11.0513, 11.1791, 11.3056, 11.4310,\n 11.5551, 11.6781, 11.8000, 11.6559, 11.7773, 11.8977, 12.0170, 12.1353,\n 12.2527, 12.3690, 12.2309, 12.3468, 12.4619, 12.5760, 12.6892, 12.8015,\n 12.9130, 12.7802, 12.8913, 13.0017, 13.1112, 13.2199, 13.3279, 13.4350,\n 13.3070, 13.4139, 13.5200, 13.6255, 13.7302, 13.8342, 13.9376, 13.8138,\n 13.9169, 14.0193, 14.1211, 14.2222, 14.3227, 14.4226, 14.3027, 14.4024,\n 14.5014, 14.5999, 14.6978, 14.7952, 14.8919, 14.7755, 14.8721, 14.9681,\n 15.0636, 15.1585, 15.2530, 15.3469, 15.2337, 15.3275, 15.4207, 15.5134,\n 15.6057, 15.6975, 15.7888, 15.6786, 15.7697, 15.8604, 15.9506, 16.0404,\n 16.1297, 16.2186, 16.1112, 16.1999, 16.2883, 16.3762, 16.4636, 16.5507,\n 16.6374, 16.5325, 16.6190, 16.7052, 16.7909, 16.8763, 16.9613, 17.0459,\n 16.9434, 17.0279, 17.1120, 17.1957, 17.2791, 17.3621, 17.4448, 17.3445,\n 17.4271, 17.5093, 17.5912, 17.6727, 17.7539, 17.8348, 17.7367, 17.8174,\n 17.8979, 17.9780, 18.0578, 18.1373, 18.2165, 18.1203, 18.1994, 18.2782,\n 18.3566, 18.4348, 18.5127, 18.5903, 18.4960, 18.5735, 18.6507, 18.7276,\n 18.8043, 18.8807, 18.9568, 18.8642, 18.9402, 19.0160, 19.0914, 19.1667,\n 19.2416, 19.3163, 19.2254, 19.3000, 19.3744, 19.4485, 19.5223])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The AFL-CIO is waiting until October to decide if it will endorse a candidate .\nSentence 2: The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.4303, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.6000, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.9847,\n 0.9180, 1.0954, 1.0289, 0.9631, 0.8980, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.2959, 1.4580, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.4412, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.6591, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.7217, 1.6646, 1.6081, 1.5519, 1.6958, 1.8385, 1.9803,\n 2.1210, 2.0642, 2.0078, 1.9518, 1.8962, 1.8411, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.9524, 2.0881, 2.0338, 2.1685, 2.1143, 2.0605,\n 2.0071, 2.1401, 2.2723, 2.2188, 2.1656, 2.1128, 2.0604, 2.1909,\n 2.1386, 2.0866, 2.0350, 2.1640, 2.2923, 2.4198, 2.5466, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.3131, 2.2630,\n 2.3868, 2.5099, 2.4597, 2.5820, 2.5319, 2.4822, 2.4327, 2.5538,\n 2.6742, 2.6247, 2.5754, 2.5265, 2.4778, 2.5969, 2.5483, 2.5000,\n 2.4520, 2.5700, 2.6874, 2.8043, 2.9205, 2.8721, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641, 3.7808, 3.5382,\n 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140, 4.9652, 4.7556, 5.0000,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.1257, 5.3468, 5.1723, 5.3886,\n 5.6000, 5.8068, 5.6395, 5.8424, 6.0412, 6.2361, 6.0751, 5.9186, 5.7664,\n 5.9588, 5.8108, 5.6667, 5.8560, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283,\n 6.3058, 6.1721, 6.0413, 5.9132, 5.7877, 5.6647, 5.8398, 5.7192, 5.8919,\n 6.0622, 6.2302, 6.3960, 6.2776, 6.1612, 6.0469, 6.2106, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714, 7.1207,\n 7.0133, 6.9076, 6.8034, 6.9511, 6.8483, 6.7469, 6.6469, 6.5483, 6.6944,\n 6.5970, 6.7416, 6.8849, 7.0268, 6.9305, 7.0711, 6.9759, 6.8819, 6.7890,\n 6.9282, 6.8364, 6.9743, 7.1110, 7.2466, 7.1556, 7.0657, 6.9768, 7.1111,\n 7.0231, 7.1563, 7.0692, 6.9830, 6.8977, 7.0296, 7.1605, 7.0759, 7.2058,\n 7.3346, 7.4625, 7.3786, 7.2956, 7.2134, 7.3402, 7.2587, 7.3845, 7.5094,\n 7.4286, 7.5526, 7.4724, 7.5955, 7.7178, 7.8393, 7.7597, 7.6808, 7.6026,\n 7.5251, 7.4483, 7.3721, 7.2966, 7.4168, 7.5364, 7.6551, 7.5800, 7.5056,\n 7.4317, 7.5495, 7.4762, 7.4034, 7.3312, 7.4482, 7.3765, 7.4927, 7.6082,\n 7.5369, 7.6517, 7.7658, 7.8793, 7.8084, 7.7380, 7.6681, 7.7808, 7.7114,\n 7.8233, 7.9347, 7.8657, 7.9764, 7.9078, 8.0178, 8.1273, 8.2362, 8.1679,\n 8.1001, 8.2084, 8.1410, 8.0741, 8.0076, 7.9415, 8.0490, 8.1560, 8.2624,\n 8.1966, 8.1312, 8.0663, 8.1721, 8.1075, 8.0433, 7.9796, 7.9162, 7.8533,\n 7.9582, 8.0627, 8.0000, 8.1039, 8.2074, 8.3103, 8.2479, 8.1858, 8.1240,\n 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: No dates have been set for the civil or the criminal trial .\nSentence 2: No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.1241, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.1613, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.3422, 9.4685, 9.3641, 9.4896, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.9769, 10.8801, 10.9917, 11.1026, 11.0070, 10.9123, 10.8186,\n 10.9291, 10.8363, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 10.8916,\n 10.9998, 11.1073, 11.0183, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.9181, 12.0185, 12.1184, 12.2178, 12.3167, 12.2325,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.6643, 12.5831, 12.6785, 12.7735, 12.6930, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.3585, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.0214, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .\nSentence 2: It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.4938, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.3422, -2.3825, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.6049, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.4736, -2.5099, -2.3734, -2.4099, -2.4461, -2.4822, -2.3473, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.1896, -2.2258, -2.2618, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853,\n 7.5514, 7.4066, 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: While dioxin levels in the environment were up last year , they have dropped by 75 percent since the 1970s , said Caswell .\nSentence 2: The Institute said dioxin levels in the environment have fallen by as much as 76 percent since the 1970s .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "66.0%", + "z-score": "13.2", + "p value": "5.61e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.7246, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 11.7907, 11.9062, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.3447, 12.4550,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.7743, 12.8813, 12.9875, 12.8749,\n 12.9807, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.4804, 13.5813, 13.6816, 13.7813, 13.8804,\n 13.9790, 13.8745, 13.9728, 14.0705, 13.9675, 14.0649, 13.9630, 13.8621,\n 13.7621, 13.6630, 13.5647, 13.6626, 13.5654, 13.6629, 13.5668, 13.4715,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.7638, 13.6707, 13.5784, 13.4868,\n 13.3960, 13.4920, 13.4021, 13.4977, 13.4086, 13.3201, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.6117, 13.5250, 13.4390, 13.3537, 13.2690, 13.3631,\n 13.2791, 13.3728, 13.2895, 13.2068, 13.3002, 13.3933, 13.4859, 13.5781,\n 13.4963, 13.4150, 13.3343, 13.2542, 13.1746, 13.2668, 13.1878, 13.2796,\n 13.2012, 13.1233, 13.2149, 13.3060, 13.3967, 13.4871, 13.4100, 13.3333,\n 13.2572, 13.1815])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: This integrates with Rational PurifyPlus and allows developers to work in supported versions of Java , Visual C # and Visual Basic .NET.\nSentence 2: IBM said the Rational products were also integrated with Rational PurifyPlus , which allows developers to work in Java , Visual C # and VisualBasic .Net.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "180", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "21.7%", + "z-score": "-1.03", + "p value": "0.849", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -0.7746, -0.5108, -0.2526, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.0094, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.9074, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.1236, 5.0034, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.1471, 6.0474, 6.1996, 6.1012, 6.0041, 5.9084,\n 6.0587, 5.9641, 6.1128, 6.0193, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.5569, 6.6973, 6.6066, 6.5169, 6.6559, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.1111, 7.0231, 7.1563, 7.2884, 7.2012,\n 7.1149, 7.0296, 6.9451, 7.0759, 6.9923, 6.9094, 6.8274, 6.9570,\n 6.8757, 7.0043, 6.9237, 6.8439, 6.9714, 6.8922, 6.8138, 6.9402,\n 6.8624, 6.9879, 7.1125, 7.2363, 7.1590, 7.0823, 7.2051, 7.3271,\n 7.4483, 7.5687, 7.6883, 7.6120, 7.5364, 7.6551, 7.7732, 7.6980,\n 7.6235, 7.5495, 7.4762, 7.5933, 7.5204, 7.4482, 7.3765, 7.4927,\n 7.4215, 7.5369, 7.4662, 7.3960, 7.5106, 7.4409, 7.5548, 7.6681,\n 7.7808, 7.8928, 8.0042, 7.9347, 7.8657, 7.9764, 8.0865, 8.0178,\n 8.1273, 8.0591, 8.1679, 8.2762, 8.3840, 8.4911, 8.5978, 8.7039,\n 8.8094, 8.9145, 8.8464, 8.7788, 8.8832, 8.9872, 9.0906, 9.0233,\n 9.1262, 9.2287, 9.3306, 9.4321, 9.5331, 9.6336, 9.7337, 9.8333,\n 9.7663, 9.6996, 9.7987, 9.8974, 9.9957, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The top rate will go to 4.45 percent for all residents with taxable incomes above $ 500,000 .\nSentence 2: For residents with incomes above $ 500,000 , the income-tax rate will increase to 4.45 percent .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 2.1170,\n 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.8856, 2.1939, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 3.0072, 2.8577, 3.1156, 2.9704, 2.8301, 2.6943,\n 2.9424, 3.1844, 3.0509, 2.9212, 2.7952, 2.6726, 2.5533, 2.4371, 2.6681,\n 2.8943, 2.7791, 3.0000, 3.2167, 3.4293, 3.3147, 3.2026, 3.0929, 3.2998,\n 3.1918, 3.3947, 3.2883, 3.1840, 3.0817, 3.2796, 3.4743, 3.3729, 3.5642,\n 3.4641, 3.3657, 3.2691, 3.1741, 3.0806, 2.9887, 3.1743, 3.3574, 3.5382,\n 3.7166, 3.8927, 3.8000, 3.7087, 3.8819, 3.7916, 3.9624, 3.8730, 3.7849,\n 3.6979, 3.8657, 4.0316, 3.9452, 3.8600, 3.7758, 3.6927, 3.6107, 3.5298,\n 3.6919, 3.8523, 3.7717, 3.9302, 4.0872, 4.2426, 4.1621, 4.0825, 4.0038,\n 4.1569, 4.0788, 4.2303, 4.1528, 4.0762, 4.0004, 4.1497, 4.2977, 4.2222,\n 4.3687, 4.2938, 4.2196, 4.1461, 4.0734, 4.0015, 3.9302, 4.0740, 4.2167,\n 4.3583, 4.4987, 4.6380, 4.5663, 4.4953, 4.6332, 4.5626, 4.6992, 4.6291,\n 4.5596, 4.4907, 4.6258, 4.7599, 4.6912, 4.6232, 4.5557, 4.4888, 4.4224,\n 4.3566, 4.4887, 4.6198, 4.5542, 4.6843, 4.8135, 4.9419, 4.8763, 4.8113,\n 4.7467, 4.8737, 4.8095, 4.9356, 4.8717, 4.8083, 4.7454, 4.8702, 4.9943,\n 4.9316, 5.0548, 4.9923, 4.9303, 4.8687, 4.8076, 4.7469, 4.6867, 4.8083,\n 4.9292, 5.0494, 5.1689, 5.2877, 5.2272, 5.1671, 5.2850, 5.2251, 5.3423,\n 5.2827, 5.2235, 5.1647, 5.2809, 5.3964, 5.3377, 5.2795, 5.2215, 5.1640,\n 5.1068, 5.0499, 5.1642, 5.2778, 5.2211, 5.3340, 5.4464, 5.5582, 5.5015,\n 5.4451, 5.3891, 5.5000, 5.4442, 5.5545, 5.4989, 5.4436, 5.3886, 5.4981,\n 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 2.1170,\n 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.8856, 2.1939, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 3.0072, 2.8577, 3.1156, 2.9704, 3.2205, 3.4641,\n 3.7017, 3.5590, 3.7905, 4.0166, 4.2378, 4.0980, 4.3142, 4.5260, 4.7336,\n 4.5968, 4.4634, 4.3333, 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569,\n 4.7488, 4.9377, 4.8177, 4.7002, 4.5850, 4.7703, 4.6571, 4.8394, 4.7281,\n 4.9075, 4.7980, 4.6904, 4.8669, 5.0410, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.0680, 4.9666, 4.8667, 4.7683, 4.9346, 5.0990, 5.2615, 5.4222, 5.5811,\n 5.4832, 5.3867, 5.5435, 5.4482, 5.6032, 5.5090, 5.6622, 5.8139, 5.9641,\n 5.8707, 6.0193, 6.1664, 6.3122, 6.2197, 6.3640, 6.5069, 6.6486, 6.5569,\n 6.4663, 6.3768, 6.2883, 6.2008, 6.3408, 6.4795, 6.3928, 6.5303, 6.6667,\n 6.8019, 6.7159, 6.6308, 6.5465, 6.6804, 6.5970, 6.7298, 6.6471, 6.7788,\n 6.6968, 6.6157, 6.7462, 6.8757, 6.7952, 6.9237, 7.0513, 6.9714, 6.8922,\n 6.8138, 6.7361, 6.6591, 6.7854, 6.9107, 7.0353, 7.1590, 7.2818, 7.2051,\n 7.1291, 7.2510, 7.1755, 7.2966, 7.2217, 7.3419, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.7407, 7.8571, 7.7831, 7.8988, 8.0139, 8.1282, 8.0546, 7.9816,\n 7.9091, 7.8372, 7.7658, 7.8793, 7.9921, 7.9211, 8.0333, 8.1448, 8.2557,\n 8.1851, 8.1150, 8.0455, 8.1556, 8.0865, 8.1960, 8.1273, 8.2362, 8.1679,\n 8.1001, 8.2084, 8.3161, 8.2486, 8.3557, 8.4623, 8.3952, 8.3286, 8.2624,\n 8.1966, 8.1312, 8.2370, 8.3423, 8.4471, 8.5513, 8.6551, 8.5899, 8.5252,\n 8.6284, 8.5640, 8.6667, 8.6026, 8.7048, 8.8065, 8.9077, 8.8439, 8.9446,\n 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The results appear in the January issue of Cancer , an American Cancer Society journal , being published online today .\nSentence 2: The results appear in the January issue of Cancer , an American Cancer Society ( news - web sites ) journal , being published online Monday .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.9847,\n 0.9180, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.6274, 0.5808, 0.5345,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.5164, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.6367, 0.7620, 0.8866, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.7461, 0.7029, 0.6598, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.1968, 11.2992, 11.4009, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.3610, 11.4614, 11.5613, 11.6606, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.2209, 12.3143, 12.4074, 12.3333,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The delegates said raising and distributing funds has been complicated by the U.S. crackdown on jihadi charitable foundations , bank accounts of terror-related organizations and money transfers .\nSentence 2: Bin Laden \u2019 s men pointed out that raising and distributing funds has been complicated by the U.S. crackdown on jihadi charitable foundations , bank accounts of terror-related organizations and money transfers .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.4290, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.4305, -1.2778, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.0974, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.1603, 4.0166,\n 3.8772, 3.7417, 3.9620, 4.1779, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.6571, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.1333, 5.0332, 5.1978, 5.0990, 5.2615, 5.1640,\n 5.0679, 5.2281, 5.3867, 5.5435, 5.6986, 5.8522, 6.0041, 5.9084,\n 5.8139, 5.7207, 5.8707, 6.0193, 5.9270, 6.0740, 5.9827, 5.8926,\n 5.8035, 5.9488, 5.8606, 6.0044, 5.9172, 6.0596, 5.9732, 6.1143,\n 6.0288, 5.9442, 5.8605, 6.0000, 5.9171, 6.0553, 5.9732, 5.8919,\n 6.0287, 6.1644, 6.2991, 6.2183, 6.3517, 6.4842, 6.6157, 6.7462,\n 6.8757, 7.0043, 7.1319, 7.2587, 7.3845, 7.3037, 7.2236, 7.1443,\n 7.2691, 7.3930, 7.3143, 7.4373, 7.3592, 7.2818, 7.4039, 7.5251,\n 7.6456, 7.5687, 7.6883, 7.8072, 7.9253, 8.0427, 8.1594, 8.2754,\n 8.3906, 8.5052, 8.6190, 8.5424, 8.4664, 8.3910, 8.5041, 8.6166,\n 8.5417, 8.6535, 8.5792, 8.5054, 8.6165, 8.7270, 8.8369, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.5161, 9.6214,\n 9.7261, 9.6532, 9.5808, 9.5089, 9.6130, 9.7167, 9.6452, 9.7483,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.4427, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.5974, 10.5286, 10.6256, 10.7222, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" Sanitation is poor ... there could be typhoid and cholera , \" he said .\nSentence 2: \" Sanitation is poor , drinking water is generally left behind . . . there could be typhoid and cholera . \"\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.1170, -1.9127, -1.7111, -1.7638, -1.5656, -1.3697, -1.1761, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -0.9578, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.6939, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.2949, -0.3428, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.7641, -0.6274, -0.6702, -0.5345,\n -0.3997, -0.2657, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.1684, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.2776, 6.4413, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.6210, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.0000, 7.9079, 8.0370, 7.9460, 8.0741,\n 7.9839, 8.1111, 8.2372, 8.1481, 8.2733, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.8304, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 8.9285, 9.0453, 9.1615, 9.0773, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.5224, 9.4438, 9.5532,\n 9.4752, 9.5840, 9.5066, 9.6148, 9.7224, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.2872, 10.3893, 10.3154, 10.4170, 10.5181, 10.4447,\n 10.3717, 10.2993, 10.3999, 10.5001, 10.4281, 10.5278, 10.6271, 10.5556,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.8770, 10.9740, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.7910, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The broader Standard & Poor 's 500 Index .SPX gave up 11.91 points , or 1.19 percent , at 986.60 .\nSentence 2: The technology-laced Nasdaq Composite Index was down 25.36 points , or 1.53 percent , at 1,628.26 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.7216, -0.7620, -0.8022, -0.6737, -0.5459, -0.5864, -0.6266, -0.5000,\n -0.5403, -0.4145, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The only announced Republican to replace Davis is Rep. Darrell Issa of Vista , who has spent $ 1.71 million of his own money to force a recall .\nSentence 2: So far the only declared major party candidate is Rep. Darrell Issa , a Republican who has spent $ 1.5 million of his own money to fund the recall .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.9962, 4.8712, 4.7488, 4.9377,\n 4.8177, 4.7002, 4.5850, 4.7703, 4.9528, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.6904, 4.5847, 4.7610, 4.9348, 4.8305, 4.7278, 4.8990,\n 5.0680, 4.9666, 4.8667, 5.0332, 4.9346, 5.0990, 5.2615, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.7955, 5.9491, 5.8522, 5.7566, 5.9084,\n 6.0587, 5.9641, 6.1128, 6.2601, 6.1664, 6.3122, 6.4566, 6.3640,\n 6.5069, 6.4153, 6.5569, 6.4663, 6.6066, 6.7456, 6.8834, 6.7937,\n 6.9303, 6.8414, 6.9768, 6.8889, 7.0231, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.2459, 7.3758, 7.2904, 7.4193, 7.5472, 7.6742, 7.5895,\n 7.7155, 7.8406, 7.7566, 7.8808, 7.7976, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.0006, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.5824, 8.5052, 8.6190, 8.5424, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.7284, 8.8396, 8.7647, 8.6903, 8.8008, 8.7270, 8.8369, 8.9461,\n 8.8728, 8.8000, 8.9086, 8.8364, 8.9444, 9.0518, 9.1587, 9.0869,\n 9.1932, 9.1218, 9.0510, 9.1567, 9.2619, 9.3665, 9.4707, 9.5743,\n 9.6774, 9.7800, 9.8821, 9.8116, 9.7415, 9.6719, 9.6028, 9.5341,\n 9.4658, 9.5673, 9.6684, 9.6005, 9.5331, 9.6336, 9.7337, 9.8333,\n 9.7663, 9.8654, 9.7987, 9.8974, 9.8311, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The decision to issue new guidance has been prompted by intelligence passed to Britain by the FBI in a secret briefing in late July .\nSentence 2: Scotland Yard 's decision to issue new guidance has been prompted by new intelligence passed to Britain by the FBI in late July .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Unable to find a home for him , a judge told mental health authorities they needed to find supervised housing and treatment for DeVries somewhere in California .\nSentence 2: The judge had told the state Department of Mental Health to find supervised housing and treatment for DeVries somewhere in California .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 0.9456, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.4003, 1.3206, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.5492,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.8370, 1.7772, 1.9261, 1.8665, 2.0140, 1.9545, 2.1005, 2.0412,\n 2.1858, 2.1268, 2.0682, 2.2111, 2.1527, 2.0948, 2.0373, 1.9803,\n 2.1210, 2.2608, 2.2037, 2.1470, 2.0907, 2.0349, 1.9795, 1.9245,\n 2.0617, 2.1980, 2.1429, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.8490, 1.7970, 1.7454, 1.8773, 1.8257,\n 1.9566, 1.9052, 1.8541, 1.9837, 2.1125, 2.0613, 2.0105, 1.9599,\n 2.0873, 2.2140, 2.1634, 2.1131, 2.0631, 2.1884, 2.1385, 2.2630,\n 2.2132, 2.3368, 2.2871, 2.4099, 2.3603, 2.4822, 2.4327, 2.5538,\n 2.5044, 2.6247, 2.5754, 2.5265, 2.6458, 2.5969, 2.5483, 2.5000,\n 2.4520, 2.5700, 2.6874, 2.6393, 2.5915, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 6.7254, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.0667, 7.9472, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.4540, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.6418, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.3641, 9.2609, 9.1590, 9.0582,\n 9.1840, 9.0845, 8.9861, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.7725, 9.6828, 9.5938,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.3445, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.6920, 10.7987, 10.9048, 11.0102, 11.1151, 11.0309, 10.9473, 10.8644,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.2602, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.8176, 11.7401, 11.6632,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.1825, 12.2767, 12.2016, 12.1270, 12.0529, 11.9792, 12.0731, 12.0000,\n 11.9273, 11.8551, 11.9487, 11.8769, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The decision came a year after Whipple ended federal oversight of the district 's racial balance , facilities , budget , and busing .\nSentence 2: The decision came a year after Whipple ended federal oversight of school busing as well as the district 's racial balance , facilities and budget .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.3615, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.3311, 0.4714, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.6351, 0.5879, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.5283, 0.4828, 0.4377, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: In midafternoon trading , the Nasdaq composite index was up 8.34 , or 0.5 percent , to 1,790.47 .\nSentence 2: The Nasdaq Composite Index .IXIC dipped 8.59 points , or 0.48 percent , to 1,773.54 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.5990, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.4747, -0.5164, -0.5579, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Morgan Stanley raised its rating on the beverage maker to \" overweight \" from \" equal-weight \" saying in part that pricing power with its bottlers should improve in 2004 .\nSentence 2: Morgan Stanley raised its rating on the company to \" overweight \" from \" equal-weight , \" saying the beverage maker 's pricing power with bottlers should improve in 2004 .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "136", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.19", + "p value": "0.883", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "202", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "34.7%", + "z-score": "3.17", + "p value": "0.000766", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 2.3094, 2.1004, 1.9052,\n 1.7219, 2.0656, 1.8898, 2.2156, 2.0466, 1.8856, 2.1939, 2.0381, 1.8889,\n 1.7457, 2.0370, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 1.6590, 1.9245,\n 1.8034, 2.0605, 1.9415, 1.8257, 1.7132, 1.6036, 1.4968, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.3333, 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.4142,\n 1.3234, 1.5430, 1.7589, 1.6678, 1.5785, 1.7889, 1.7002, 1.6131, 1.5275,\n 1.7321, 1.6471, 1.5635, 1.7634, 1.6803, 1.8766, 1.7942, 1.7130, 1.9052,\n 2.0948, 2.2819, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.3238, 2.2453,\n 2.1678, 2.0913, 2.0158, 2.1918, 2.3658, 2.5378, 2.7080, 2.6316, 2.5560,\n 2.7235, 2.6485, 2.8138, 2.7393, 2.6656, 2.5927, 2.7552, 2.6828, 2.8433,\n 2.7713, 2.7001, 2.8583, 2.7875, 2.7175, 2.6481, 2.8039, 2.7349, 2.6667,\n 2.5991, 2.7524, 2.6852, 2.6186, 2.7699, 2.7037, 2.8534, 2.7875, 2.7222,\n 2.6575, 2.5934, 2.5298, 2.6768, 2.6135, 2.5508, 2.4887, 2.4271, 2.5717,\n 2.5103, 2.4495, 2.3891, 2.3293, 2.4717, 2.4121, 2.5532, 2.6933, 2.6336,\n 2.5744, 2.7129, 2.6540, 2.5954, 2.5373, 2.6742, 2.6163, 2.5589, 2.6943,\n 2.6370, 2.7713, 2.7143, 2.6576, 2.7906, 2.9227, 3.0540, 3.1844, 3.1273,\n 3.0706, 3.0143, 2.9584, 3.0872, 3.0315, 2.9761, 2.9212, 2.8666, 2.9938,\n 3.1203, 3.2460, 3.3710, 3.3160, 3.2614, 3.3853, 3.3309, 3.4539, 3.3996,\n 3.3457, 3.2921, 3.4140, 3.3606, 3.4816, 3.4283, 3.3754, 3.4954, 3.4427,\n 3.3902, 3.3381, 3.4570, 3.4050, 3.3534, 3.3020, 3.4198, 3.3686, 3.3177,\n 3.4346, 3.3838, 3.5000, 3.4494, 3.3990, 3.3489, 3.2991, 3.2496, 3.3645,\n 3.3151, 3.2660, 3.2171, 3.1685])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The pound also made progress against the dollar , reached fresh three-year highs at $ 1.6789 .\nSentence 2: The British pound flexed its muscle against the dollar , last up 1 percent at $ 1.6672 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "106", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "32.1%", + "z-score": "1.68", + "p value": "0.0463", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.3389, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.3856, 1.3213, 1.2577, 1.4222, 1.5852,\n 1.7467, 1.6823])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "67.1%", + "z-score": "8.31", + "p value": "4.72e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990, 5.1962,\n 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.3509, 6.0212, 6.2598,\n 6.4902, 6.1968, 6.4254, 6.6469, 6.8620, 6.5997, 6.8127, 7.0201, 7.2222,\n 6.9830, 6.7543, 6.9570, 7.1550, 6.9402, 6.7338, 6.9310, 7.1241, 6.9282,\n 6.7390, 6.9307, 7.1187, 6.9378, 7.1232, 7.3051, 7.4838, 7.3113, 7.1435,\n 7.3208, 7.4952, 7.3333, 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996,\n 7.8628, 7.7152, 7.5707, 7.7326, 7.8923, 7.7517, 7.6140, 7.7723, 7.9286,\n 7.7942, 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Friday , Stanford ( 47-15 ) blanked the Gamecocks 8-0 .\nSentence 2: Stanford ( 46-15 ) has a team full of such players this season .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.24", + "p value": "0.0124", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.4003, 1.3206, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.6908, 1.6187, 1.5475, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.5164, 1.6828, 1.8475, 1.7809, 1.7150, 1.8773, 1.8116,\n 1.7467, 1.9066, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.8682, 2.0197, 1.9582, 1.8974,\n 2.0470, 1.9863, 1.9261, 2.0739, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 2.1268, 2.0682, 2.0101, 1.9524, 1.8953, 2.0373, 2.1783,\n 2.1210, 2.0642, 2.2037, 2.1470, 2.0907, 2.2287, 2.3657, 2.3094,\n 2.2535, 2.1980, 2.1429, 2.2780, 2.2230, 2.1685, 2.1143, 2.0605,\n 2.1938, 2.3262, 2.2723, 2.2188, 2.3500, 2.2966, 2.2436])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "153", + "Fraction of T in Greenlist": "76.9%", + "z-score": "16.9", + "p value": "2.14e-64", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.6719, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.3347, 10.4608, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.0780,\n 11.1966, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 11.7762, 11.8885, 11.7778, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.4365, 12.5434, 12.4370, 12.5434, 12.6491,\n 12.7542, 12.8586, 12.9624, 13.0656, 13.1681, 13.0644, 13.1665, 13.0639,\n 13.1657, 13.2669, 13.3675, 13.4675, 13.5670, 13.6659, 13.7642, 13.6640,\n 13.7621, 13.6630, 13.7606, 13.8578, 13.9544, 14.0505, 14.1462, 14.2413,\n 14.3360, 14.2390, 14.3333, 14.2374, 14.3314, 14.4250, 14.5181, 14.6107,\n 14.7029, 14.7947, 14.8860, 14.7920, 14.8831, 14.7899, 14.8807, 14.9711,\n 15.0610, 15.1505, 15.2397, 15.3284, 15.4167, 15.3254, 15.4135, 15.3230,\n 15.4108, 15.4983, 15.5853, 15.6720, 15.7584, 15.8443, 15.9299, 15.8411,\n 15.9264, 15.8384, 15.9235, 16.0083, 16.0928, 16.1769, 16.2607, 16.3441,\n 16.4272, 16.3407, 16.4236, 16.3377, 16.4205, 16.5028, 16.5849, 16.6667,\n 16.7481, 16.8292, 16.9101, 16.8256, 16.9063, 16.8225, 16.9030])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Last month Intel raised its revenue guidance for the quarter to between $ 7.6 billion and $ 7.8 billion .\nSentence 2: At the end of the second quarter , Intel initially predicted sales of between $ 6.9 billion and $ 7.5 billion .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.4576, -1.5119, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.5667, -1.6166, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.3819, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.0844, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.3536, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.7609, -1.7997, -1.8383, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -2.0726, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.7723, 7.9115, 8.0495, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 9.0057, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.3320, 9.4474, 9.5620, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.8430, 9.7590, 9.6757, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.1955, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.0177, 10.9422, 10.8673, 10.9669,\n 10.8925, 10.9917, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.2607, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The driver , Eugene Rogers , helped to remove children from the bus , Wood said .\nSentence 2: At the accident scene , the driver was \" covered in blood \" but helped to remove children , Wood said .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "16.8%", + "z-score": "-2.12", + "p value": "0.983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.0665, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -1.8958, -1.9437, -1.9911, -2.0381,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.1172])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: ONG KONG , July 9 Tens of thousands of demonstrators gathered tonight before the legislature building here to call for free elections and the resignation of Hong Kong 's leader .\nSentence 2: Tens of thousands of demonstrators gathered yesterday evening to stand before this city 's legislature building and call for free elections and the resignation of Hong Kong 's leader .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.1213,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.1309, -1.9535, -2.0000, -2.0461, -2.0918, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -1.9298, -1.9749, -2.0197, -2.0641, -1.8974,\n -1.9420, -1.9863, -2.0303, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.1880, -2.0349, -2.0761, -1.9245,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -1.9137, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.5181, -2.3835,\n -2.4195, -2.4553, -2.3221, -2.3580, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Saddam loyalists have been blamed for sabotaging the nation 's infrastructure , as well as frequent attacks on U.S. soldiers .\nSentence 2: Hussein loyalists have been blamed for sabotaging the nation 's infrastructure and attacking US soldiers .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "7.5%", + "z-score": "-5.69", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.8368,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.7626,\n -3.7981, -3.8335, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -4.0415,\n -4.0754, -4.1092, -4.1429, -4.1763, -4.2096, -4.2426, -4.2756, -4.3083,\n -4.3409, -4.3733, -4.4055, -4.4376, -4.4695, -4.5013, -4.5329, -4.5644,\n -4.5957, -4.6268, -4.6578, -4.6887, -4.7194, -4.7500, -4.7804, -4.8107,\n -4.8409, -4.8709, -4.9008, -4.9305, -4.9601, -4.9896, -5.0190, -5.0483,\n -5.0774, -5.1064, -5.1352, -5.1640, -5.1926, -5.2211, -5.2495, -5.2778,\n -5.3060, -5.3340, -5.3619, -5.3898, -5.4175, -5.4451, -5.4726, -5.5000,\n -5.5273, -5.5545, -5.5816, -5.6085, -5.6354, -5.6622, -5.6889])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142, 1.2702, 1.5852, 1.4444,\n 1.7457, 1.6082, 1.8974, 1.7628, 1.6330, 1.5076, 1.3862, 1.2687, 1.5396,\n 1.4237, 1.3112, 1.2019, 1.4606, 1.3525, 1.6036, 1.8489, 2.0889, 1.9795,\n 1.8728, 1.7685, 1.6667, 1.5671, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142,\n 1.3234, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.3198, 1.2366,\n 1.4434, 1.6471, 1.5635, 1.7634, 1.6803, 1.8766, 2.0702, 1.9870, 1.9052,\n 1.8245, 2.0135, 1.9333, 2.1193, 2.3028, 2.2226, 2.1436, 2.0656, 1.9887,\n 1.9127, 1.8378, 1.7638, 1.6908, 1.8677, 1.7951, 1.7233, 1.8972, 1.8257,\n 1.9973, 1.9262, 2.0954, 2.0247, 1.9548, 1.8856, 1.8173, 1.7496, 1.9149,\n 1.8475, 1.7809, 1.7150, 1.8773, 1.8116, 1.9720, 2.1309, 2.2884, 2.2222,\n 2.1567, 2.0918, 2.0276, 1.9640, 1.9009, 1.8385, 1.7767, 1.9298, 1.8682,\n 1.8071, 1.9582, 1.8974, 2.0470, 1.9863, 2.1344, 2.0739, 2.0140, 1.9545,\n 1.8956, 1.8371, 1.9825, 1.9242, 2.0682, 2.2111, 2.1527, 2.2943, 2.4348,\n 2.3764, 2.5156, 2.4574, 2.5954, 2.5373, 2.4797, 2.4225, 2.3657, 2.5019,\n 2.4453, 2.5802, 2.7143, 2.6576, 2.6014, 2.5456, 2.4902, 2.4351, 2.3805,\n 2.3262, 2.2723, 2.4037, 2.3500, 2.2966, 2.4267, 2.3735, 2.5026, 2.4495,\n 2.5776, 2.5247, 2.4721, 2.4198, 2.3679, 2.3163, 2.4426, 2.3912, 2.3400,\n 2.2892, 2.4142, 2.3635, 2.4877, 2.6112, 2.7340, 2.6830, 2.6323, 2.5820,\n 2.5319, 2.4822, 2.4327, 2.3835, 2.3346, 2.4553, 2.4065, 2.3580, 2.4778,\n 2.4294, 2.5483, 2.5000, 2.6182, 2.5700, 2.5220, 2.4744, 2.4269, 2.3798,\n 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Its closest living relatives are a family frogs called sooglossidae that are found only in the Seychelles in the Indian Ocean .\nSentence 2: Its closest relative is found in the Seychelles Archipelago , near Madagascar in the Indian Ocean .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.1857, 7.0557, 6.9282,\n 7.0895, 6.9646, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 8.0632, 8.2035, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.1380, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.1391, 10.2514, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.6052, 10.7141, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.0952, 11.0102, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.3423, 11.4450, 11.3616, 11.2789, 11.3812, 11.2992, 11.4009, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.5414, 11.6412, 11.7405, 11.8393, 11.9377,\n 12.0355, 12.1329, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.5495, 12.6439, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.9540, 13.0460, 12.9691, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.1966, 13.1219, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Cooley said he expects Muhammad will similarly be called as a witness at a pretrial hearing for Malvo .\nSentence 2: Lee Boyd Malvo will be called as a witness Wednesday in a pretrial hearing for fellow sniper suspect John Allen Muhammad .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.7%", + "z-score": "-0.0821", + "p value": "0.533", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.0328,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -0.7956, -0.6086, -0.6658, -0.4815, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -0.8489, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -0.8292, -0.6963,\n -0.5642, -0.6058, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.2075, 6.4019, 6.5924, 6.7795, 6.6150, 6.4550,\n 6.6398, 6.8214, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376,\n 7.5056, 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" Instead of pursuing the most imminent and real threats - international terrorists , \" Graham said , \" this Bush administration chose to settle old scores . \"\nSentence 2: \" Instead of pursuing the most imminent and real threats - international terrorists - this Bush administration has chosen to settle old scores , \" Graham said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -1.7408, -1.4631, -1.5323, -1.2632, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.5922, -0.3922, -0.4547, -0.2582,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.9901,\n 1.1345, 1.0812, 1.2243, 1.1711, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.1905, 1.3288, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.4470, 1.5818, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.4606,\n 1.4105, 1.5423, 1.4923, 1.4427, 1.3933, 1.5236, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.6336, 1.5848, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.0336, 1.1547, 1.1106, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641, 3.2206, 2.9938,\n 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284, 2.6558, 2.4910, 2.3333,\n 2.6186, 2.8947, 3.1623, 3.4219, 3.2660, 3.5176, 3.7626, 4.0012, 3.8490,\n 3.7017, 3.5590, 3.7905, 4.0166, 3.8772, 3.7417, 3.6098, 3.4816, 3.3566,\n 3.5753, 3.4528, 3.6667, 3.5466, 3.4293, 3.3147, 3.5228, 3.7273, 3.9284,\n 4.1260, 4.3205, 4.5118, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 4.7281,\n 4.6188, 4.7980, 4.9747, 4.8669, 4.7610, 4.6568, 4.5544, 4.4537, 4.6268,\n 4.5274, 4.6981, 4.6000, 4.5034, 4.4083, 4.5760, 4.7419, 4.9058, 5.0679,\n 5.2281, 5.3867, 5.2915, 5.4482, 5.6032, 5.7566, 5.6622, 5.5691, 5.4772,\n 5.6286, 5.7785, 5.6875, 5.5976, 5.5088, 5.4212, 5.3345, 5.4822, 5.3964,\n 5.5426, 5.4576, 5.3736, 5.2906, 5.4349, 5.5780, 5.7199, 5.8605, 6.0000,\n 6.1383, 6.0553, 6.1924, 6.3283, 6.4632, 6.3807, 6.2991, 6.2183, 6.3517,\n 6.4842, 6.4040, 6.3246, 6.2459, 6.1680, 6.0908, 6.2217, 6.1451, 6.2750,\n 6.1990, 6.1237, 6.0491, 6.1777, 6.3054, 6.4322, 6.5582, 6.6833, 6.8076,\n 6.7330, 6.8564, 6.9789, 7.1007, 7.0265, 6.9529, 6.8799, 7.0007, 7.1207,\n 7.0481, 6.9762, 6.9048, 6.8339, 6.7637, 6.8825, 6.8127, 6.9307, 6.8614,\n 6.7925, 6.7242, 6.8413, 6.9577, 7.0735, 7.1885, 7.3030, 7.4168, 7.3485,\n 7.4616, 7.5740, 7.6859, 7.6179, 7.5504, 7.4833, 7.5944, 7.7048, 7.6381,\n 7.5719, 7.5061, 7.4407, 7.3758, 7.4853, 7.4208, 7.5297, 7.4655, 7.4017,\n 7.3383, 7.4465, 7.5542, 7.6613, 7.7679, 7.8740, 7.9796, 7.9162, 8.0212,\n 8.1258, 8.2298, 8.1667, 8.1039, 8.0416, 8.1449, 8.2479, 8.1858, 8.1240,\n 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He said they lied on a sworn affidavit that requires them to list prior marriages .\nSentence 2: Morgenthau said the women , all U.S. citizens , lied on a sworn affidavit that requires them to list prior marriages .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.0461, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.1803, 0.1348, 0.2689, 0.2234, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.3884, 0.3443, 0.3004, 0.4280, 0.3841, 0.5108,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.6299, 0.5864, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.8682, 0.8248, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The association said 28.2 million DVDs were rented in the week that ended June 15 , compared with 27.3 million VHS cassettes .\nSentence 2: The Video Software Dealers Association said 28.2 million DVDs were rented out last week , compared to 27.3 million VHS cassettes .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "106", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.336", + "p value": "0.632", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 6.6953, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 6.8229, 6.9903, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.5157, 11.4184, 11.3222, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.6584, 11.7647, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.9060, 12.0096, 11.9187, 11.8287, 11.9319,\n 12.0345, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.2628, 12.3629,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.3027, 12.4015, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.3829, 13.3002, 13.3933, 13.4859, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.5985, 13.5179, 13.6091, 13.6999, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 13.9007, 13.9897, 13.9113, 14.0000,\n 13.9221, 14.0106, 13.9332, 14.0214, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: With these assets , Funny Cide has a solid chance to become the first Triple Crown winner since Affirmed in 1978 .\nSentence 2: Funny Cide is looking to become horse racing 's first Triple Crown winner in a generation .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Intel was disappointed and assessing its \" options in the event Mr. Hamidi resumes his spamming activity against Intel , \" spokesman Chuck Mulloy said .\nSentence 2: Intel spokesman Chuck Mulloy said the company was disappointed and assessing its \" options in the event Mr. Hamidi resumes his spamming activity against Intel . \"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "177", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "26.0%", + "z-score": "0.304", + "p value": "0.381", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.2462,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.7570, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.8296, 0.9812, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.2657, 0.3974, 0.3522, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.3038])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868, 3.2206, 3.5382,\n 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426, 4.5033, 4.7556, 5.0000,\n 5.2372, 5.4678, 5.2705, 5.0811, 4.8990, 4.7237, 4.5547, 4.7819, 5.0037,\n 5.2204, 5.0576, 4.8999, 5.1121, 4.9592, 5.1671, 5.3708, 5.5705, 5.7664,\n 5.6183, 5.4740, 5.3333, 5.5261, 5.7155, 5.5783, 5.7646, 5.6307, 5.8140,\n 5.9944, 5.8635, 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.0125, 6.1828,\n 6.3509, 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.9714, 6.8641,\n 7.0133, 6.9076, 6.8034, 6.7006, 6.8483, 6.7469, 6.8931, 7.0379, 6.9378,\n 6.8391, 6.9824, 6.8849, 7.0268, 6.9305, 7.0711, 6.9759, 6.8819, 6.7890,\n 6.6973, 6.6066, 6.5169, 6.4283, 6.3408, 6.2541, 6.1685, 6.3070, 6.4444,\n 6.3595, 6.2755, 6.1924, 6.3283, 6.4632, 6.3807, 6.5144, 6.6471, 6.5653,\n 6.6968, 6.6157, 6.5354, 6.6658, 6.7952, 6.7155, 6.8439, 6.9714, 7.0980,\n 7.2236, 7.1443, 7.0658, 7.1904, 7.1125, 7.0353, 7.1590, 7.2818, 7.2051,\n 7.3271, 7.2510, 7.3721, 7.2966, 7.4168, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.7407, 7.6667, 7.5933, 7.5204, 7.6368, 7.7524, 7.8673, 7.7949,\n 7.7230, 7.6517, 7.5809, 7.5106, 7.6246, 7.7380, 7.6681, 7.7808, 7.8928,\n 8.0042, 8.1150, 8.0455, 7.9764, 8.0865, 8.0178, 7.9497, 8.0591, 8.1679,\n 8.1001, 8.2084, 8.1410, 8.2486, 8.1817, 8.2887, 8.3952, 8.5012, 8.6066,\n 8.7116, 8.6448, 8.5785, 8.5126, 8.4471, 8.3820, 8.4862, 8.5899, 8.6932,\n 8.6284, 8.5640, 8.6667, 8.7689, 8.8706, 8.9718, 8.9077, 8.8439, 8.9446,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Mr Annan also warned the US should not use the war on terror as an excuse to suppress \" long-cherished freedoms \" .\nSentence 2: Annan warned that the dangers of extremism after September 11 should not be used as an excuse to suppress \" long-cherished \" freedoms .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.4%", + "z-score": "-2.77", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.1019, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.2151, -2.0247, -2.0732, -2.1213,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -2.9227, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.9216, -2.9584, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -2.8853, -2.9215, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.1334,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.0302, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.0317, -2.8977, -2.9320, -2.9661, -2.8333,\n -2.7013, -2.7358, -2.7701])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "64.0%", + "z-score": "12.6", + "p value": "7.37e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.2351, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.0987, 9.2202, 9.3408, 9.2480, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.7473, 9.8632, 9.7725, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.8494, 9.9625, 9.8753, 9.7890, 9.9015,\n 10.0133, 9.9278, 9.8430, 9.9542, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.7367, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.9091, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.0793, 11.1807, 11.2816, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.4244, 11.3468, 11.4459, 11.5444, 11.6425, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.9534, 12.0493, 12.1447, 12.0685, 12.1635, 12.2581,\n 12.3523, 12.2767, 12.2016, 12.2954, 12.3888, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.5183, 12.4448, 12.5367, 12.6283])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: At community colleges , tuition will jump to $ 2,800 from $ 2,500 .\nSentence 2: Community college students will see their tuition rise by $ 300 to $ 2,800 or 12 percent .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "12.6%", + "z-score": "-4.05", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.4495, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.6833, -2.7351, -2.7863, -2.8368, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -3.2240, -3.2686, -3.3128, -3.3566,\n -3.4000, -3.4429, -3.4855, -3.5277, -3.5695, -3.6109, -3.6520, -3.6927,\n -3.7331, -3.7732, -3.5708, -3.6116, -3.6520, -3.4538, -3.4949, -3.5355,\n -3.5759, -3.6159, -3.6556, -3.6950, -3.7342, -3.7730, -3.8115, -3.8497,\n -3.8877, -3.9254, -3.9628, -4.0000, -4.0369, -4.0736, -4.1100, -4.1461,\n -3.9648, -4.0015, -3.8225, -3.8596, -3.8965, -3.9331, -3.9694, -3.7947,\n -3.8315, -3.8680, -3.9043, -3.9404, -3.9763, -4.0119, -4.0473, -4.0825,\n -4.1175, -4.1522, -4.1868, -4.2212, -4.2553, -4.2893, -4.3231, -4.1586,\n -4.1927, -4.0301, -4.0645, -3.9036, -3.9384, -3.9729, -4.0073, -4.0415,\n -4.0754, -4.1092, -4.1429, -4.1763, -4.0204, -4.0541, -3.8997, -3.9337,\n -3.9675, -3.8150, -3.8490, -3.6980, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.6856, -3.7196, -3.7534, -3.7870, -3.8205, -3.8538, -3.7082, -3.7417,\n -3.7750, -3.8081, -3.8411, -3.8740, -3.7311, -3.7641, -3.6224, -3.6556,\n -3.6887, -3.7216, -3.7543, -3.7869, -3.6477, -3.6805, -3.7131, -3.7455,\n -3.7778, -3.8100, -3.8420, -3.8739, -3.9056, -3.9372, -3.9687, -4.0000,\n -4.0312, -4.0622, -4.0931, -4.1239, -3.9900, -4.0210, -4.0518])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.5%", + "z-score": "9.27", + "p value": "9.06e-21", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990, 5.1962,\n 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 5.7735, 6.0212, 6.2598,\n 5.9604, 6.1968, 6.4254, 6.1546, 6.3805, 6.1283, 5.8890, 5.6614, 5.4444,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.7155, 5.5277, 5.3468, 5.5626, 5.7735,\n 5.6000, 5.8068, 5.6395, 5.4772, 5.3199, 5.5234, 5.7229, 5.9186, 5.7664,\n 5.6183, 5.8108, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.1283,\n 5.9944, 5.8635, 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.0125, 6.1828,\n 6.3509, 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.9472, 7.8296, 7.9754, 8.1196, 8.0042, 8.1471,\n 8.2885, 8.1750, 8.3152, 8.2035, 8.0934, 7.9849, 7.8779, 7.7723, 7.6681,\n 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139, 8.1483, 8.0483, 8.1816,\n 8.0829, 7.9853, 7.8889, 8.0212, 8.1524, 8.2825, 8.1873, 8.0931, 8.2222,\n 8.3503, 8.4774, 8.6035, 8.7287, 8.6357, 8.5437, 8.4526, 8.3625, 8.2733,\n 8.3976, 8.3093, 8.4327, 8.5553, 8.4679, 8.3813, 8.5030, 8.4173, 8.5381,\n 8.6581, 8.7773, 8.8958, 9.0134, 8.9285, 9.0453, 8.9612, 8.8778, 8.7952,\n 8.7133, 8.8294, 8.7482, 8.6677, 8.5879, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.6556, 8.5796, 8.5041, 8.6166, 8.7284,\n 8.6535, 8.7647, 8.6903, 8.8008, 8.9107, 8.8369, 8.9461, 8.8728, 8.8000,\n 8.7278, 8.6560, 8.5848, 8.5141, 8.6226, 8.7305, 8.8379, 8.7676, 8.6978,\n 8.8045, 8.9107, 8.8413, 8.9469, 8.8780, 8.8094, 8.7414, 8.8464, 8.9509,\n 9.0549, 8.9872, 8.9199, 9.0233, 9.1262, 9.2287, 9.3306, 9.4321, 9.3651,\n 9.2986, 9.2324, 9.1667, 9.1013, 9.2022, 9.3026, 9.2376, 9.3375, 9.2729])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He made a point of saying during Tuesdays debate that the Confederate flag was a racist symbol .\nSentence 2: Though Dean made a point of saying during the debate that the Confederate flag is a racist symbol .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.0000,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.0000, 7.9079, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.0219, 7.9336, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.3324, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.4423, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.7482, 8.8636, 8.9783, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.7224, 9.8293, 9.9357, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.0987, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.5993, 11.5261, 11.6217, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.9273, 11.8551, 11.7833, 11.7120, 11.8056, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Bush wanted \" to see an aircraft landing the same way that the pilots saw an aircraft landing , \" White House press secretary Ari Fleischer said yesterday .\nSentence 2: On Tuesday , before Byrd 's speech , Fleischer said Bush wanted ' ' to see an aircraft landing the same way that the pilots saw an aircraft landing .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.3735, -0.1857, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.3246, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: On Monday the Palestinian Prime Minister , Mahmoud Abbas , will report to the Palestinian parliament on his Government 's achievements in its first 100 days in office .\nSentence 2: Palestinian Prime Minister Mahmoud Abbas must defend the record of his first 100 days in office before Parliament today as the death toll in the occupied territories continues to rise .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 1.2501, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 2.0517, 1.9829, 1.9149, 2.0785, 2.0107, 1.9437, 2.1049, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.2222, 2.1567, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.2711, 2.4227, 2.5731, 2.5087, 2.4449, 2.5934, 2.5298,\n 2.4669, 2.6135, 2.7591, 2.6961, 2.6336, 2.5717, 2.7153, 2.6536,\n 2.7958, 2.7344, 2.6735, 2.6131, 2.7534, 2.8928, 3.0311, 2.9704,\n 2.9103, 3.0471, 2.9872, 2.9277, 3.0632, 3.1977, 3.1382, 3.0792,\n 3.0206, 3.1536, 3.0952, 3.2271, 3.1690, 3.1113, 3.0540, 3.1844,\n 3.3140, 3.4428, 3.3853, 3.3282, 3.4558, 3.3989, 3.3424, 3.4689,\n 3.5946, 3.5382, 3.4821, 3.4263, 3.5508, 3.4953, 3.6188, 3.5635,\n 3.5085, 3.4539, 3.5762, 3.6979, 3.8189, 3.7641, 3.7097, 3.8297,\n 3.7755, 3.7216, 3.8406, 3.9590, 3.9052, 3.8516, 3.7984, 3.9158,\n 3.8627, 3.9793, 3.9265, 3.8739, 3.8216, 3.9372, 4.0522, 4.1667,\n 4.1143, 4.0622, 4.1758, 4.1239, 4.0723, 4.1851, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415, 3.7808, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140, 4.9652, 4.7556, 4.5556,\n 4.8008, 4.6101, 4.8488, 4.6663, 4.8990, 5.1257, 5.3468, 5.1723, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.5234, 5.3708, 5.5705, 5.7664,\n 5.9588, 5.8108, 5.6667, 5.8560, 5.7155, 5.5783, 5.4444, 5.6307, 5.4997,\n 5.6830, 5.5549, 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.0125, 6.1828,\n 6.3509, 6.2302, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.5137, 6.6679, 6.5607, 6.7132, 6.6075,\n 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 6.9945, 7.1393, 7.2827, 7.1813,\n 7.3233, 7.2232, 7.1243, 7.2650, 7.4044, 7.3068, 7.2104, 7.1152, 7.2532,\n 7.1591, 7.0662, 6.9743, 7.1110, 7.0201, 7.1556, 7.0657, 7.2001, 7.3333,\n 7.4655, 7.3765, 7.5076, 7.4194, 7.5494, 7.6785, 7.5912, 7.7192, 7.6328,\n 7.7598, 7.6742, 7.8003, 7.7155, 7.6315, 7.5484, 7.6734, 7.5910, 7.7152,\n 7.6335, 7.7567, 7.6758, 7.7981, 7.7178, 7.8393, 7.9600, 8.0798, 8.0002,\n 8.1192, 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.2365, 8.3525, 8.2754,\n 8.3906, 8.3140, 8.2381, 8.1628, 8.2772, 8.2024, 8.1282, 8.0546, 8.1683,\n 8.0952, 8.2082, 8.1356, 8.2479, 8.3595, 8.4706, 8.3984, 8.5088, 8.4371,\n 8.5469, 8.6560, 8.5848, 8.6933, 8.6226, 8.7305, 8.6603, 8.7676, 8.6978,\n 8.6284, 8.5595, 8.6662, 8.5978, 8.5298, 8.4623, 8.5683, 8.5012, 8.6066,\n 8.5399, 8.6448, 8.7492, 8.8531, 8.7867, 8.8900, 8.8240, 8.9268, 9.0292,\n 8.9635, 9.0653, 9.0000, 9.1013, 9.0364, 9.1372, 9.2376, 9.1730, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The civilian unemployment rate improved marginally last month -- slipping to 6.1 percent -- even as companies slashed payrolls by 93,000 .\nSentence 2: The civilian unemployment rate improved marginally last month _ sliding down to 6.1 percent _ as companies slashed payrolls by 93,000 amid continuing mixed signals about the nation 's economic health .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "39", + "# Tokens in Greenlist": "6", + "Fraction of T in Greenlist": "15.4%", + "z-score": "-1.39", + "p value": "0.917", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321, 1.5403, 1.9052,\n 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284, 3.1177, 3.3968, 3.6667,\n 3.9279, 4.1812, 4.4272, 4.6663, 4.4907, 4.7237, 4.5547, 4.3916, 4.2339,\n 4.0814, 4.3083, 4.5301, 4.3818, 4.5985, 4.8107, 4.6664, 4.5260, 4.7336,\n 4.9373, 4.8003, 4.6667, 4.5363, 4.4091, 4.2848, 4.1633, 4.3618, 4.2426,\n 4.1260, 4.0119, 3.9001, 3.7905, 3.9837, 4.1740, 4.3614, 4.2528, 4.4371,\n 4.6188, 4.5115, 4.4061, 4.3026, 4.2008, 4.1008, 4.0024, 3.9056, 3.8103,\n 3.9869, 3.8927, 3.8000, 3.9736, 4.1451, 4.3146, 4.2222, 4.3894, 4.5547,\n 4.4630, 4.3727, 4.2836, 4.1957, 4.1090, 4.2710, 4.4313, 4.5899, 4.7469,\n 4.9023, 5.0562, 5.2086, 5.3594, 5.5088, 5.6569, 5.5690, 5.4822, 5.3964,\n 5.3116, 5.4576, 5.6023, 5.5181, 5.6614, 5.5780, 5.4956, 5.6373, 5.7778,\n 5.9171, 5.8351, 5.7540, 5.6737, 5.8114, 5.9481, 6.0837, 6.0038, 6.1382,\n 6.2716, 6.1923, 6.1137, 6.0359, 5.9589, 6.0908, 6.2217, 6.3517, 6.2750,\n 6.4039, 6.3278, 6.2524, 6.1777, 6.1036, 6.0302, 6.1577, 6.2843, 6.4101,\n 6.3369, 6.4618, 6.5857, 6.5130, 6.4409, 6.3694, 6.2985, 6.2282, 6.3509,\n 6.4728, 6.5939, 6.7143, 6.8339, 6.9529, 7.0711, 7.1886, 7.3054, 7.4215,\n 7.3508, 7.2807, 7.2111, 7.1420, 7.0735, 7.0054, 7.1204, 7.0527, 6.9856,\n 7.0998, 7.2134, 7.3263, 7.2594, 7.3717, 7.4833, 7.4167, 7.3506, 7.2849,\n 7.2197, 7.1549, 7.2656, 7.3758, 7.4853, 7.5944, 7.7028, 7.8107, 7.9181,\n 8.0249, 8.1312, 8.2370, 8.1721, 8.1075, 8.0433, 7.9796, 8.0847, 8.0212,\n 7.9582, 8.0627, 8.1667, 8.1039, 8.0416, 7.9796, 7.9179, 8.0212, 8.1240,\n 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Per-user pricing is $ 29 for Workplace Messaging , $ 89 for Team Collaboration and $ 35 for Collaborative Learning .\nSentence 2: Workplace Messaging is $ 29 , Workplace Team Collaboration is $ 89 , and Collaborative Learning is $ 35 .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, 0.0000, -0.0420, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.9604, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.2016, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.1302, 9.2463, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.9542, 9.8702, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.9091, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.2389, 11.3402, 11.2602, 11.3610, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.0758, 13.1667,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" Instead of pursuing the most imminent and real threats \u2013 international terrorism \u2013 this Bush administration chose to settle old scores , \" Mr. Graham said .\nSentence 2: \" Instead of pursuing the most imminent and real threats - international terrorists , \" Graham said , \" this Bush administration chose to settle old scores . \"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.4857, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.0949, 9.2424, 9.3881, 9.5321, 9.3834, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.5219, 9.3831, 9.2469, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.6719, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.3086, 9.4425, 9.5751, 9.4560, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.5021, 9.3901, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.6813, 9.8064, 9.6995, 9.5939, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.5556, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.7219, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 9.9315, 9.8389, 9.7473, 9.8632, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.0504, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.2106, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.6700, 10.5848,\n 10.5002, 10.4164, 10.5238, 10.6306, 10.7367, 10.8423, 10.9473, 11.0517,\n 10.9689, 11.0728, 11.1761, 11.2789, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.5897, 13.5131, 13.6025, 13.6914, 13.7801, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: SARS has killed about 800 people and affected more than 8400 since being detected in China in November .\nSentence 2: SARS has killed about 800 people and sickened more than 8,400 worldwide , mostly in Asia .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "119", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "28.6%", + "z-score": "0.9", + "p value": "0.184", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, 0.0000, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.7237, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.0954, 1.2710, 1.4446, 1.3770, 1.3101, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.3213, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774, 0.4201, 0.8165,\n 0.6623, 0.5164, 0.8819, 0.7385, 1.0835, 1.4142, 1.2702, 1.5852, 1.8889,\n 2.1822, 2.0370, 1.8974, 2.1776, 2.4495, 2.3116, 2.1783, 2.0494, 1.9245,\n 1.8034, 1.6859, 1.9415, 2.1909, 2.0738, 1.9599, 2.2011, 2.0889, 2.3238,\n 2.5538, 2.7791, 2.6667, 2.8868, 3.1027, 3.3147, 3.5228, 3.7273, 3.6141,\n 3.5032, 3.3947, 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.6662, 3.8552,\n 4.0415, 4.2251, 4.1219, 4.0205, 4.2008, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.6000, 4.5034, 4.6715, 4.5760, 4.7419, 4.6476, 4.8113,\n 4.9731, 5.1332, 5.0395, 4.9472, 5.1051, 5.0138, 5.1698, 5.0795, 5.2338,\n 5.3865, 5.5377, 5.4480, 5.3594, 5.5088, 5.4212, 5.5690, 5.4822, 5.6285,\n 5.7735, 5.9172, 5.8310, 5.7457, 5.8878, 5.8034, 5.9442, 5.8605, 6.0000,\n 6.1383, 6.2755, 6.1924, 6.1101, 6.2459, 6.1644, 6.2991, 6.2183, 6.3517,\n 6.4842, 6.6157, 6.5354, 6.4558, 6.5861, 6.5072, 6.6365, 6.5583, 6.6865,\n 6.8138, 6.9402, 6.8624, 6.7854, 6.9107, 6.8343, 6.9587, 6.8828, 7.0063,\n 7.1291, 7.2510, 7.1755, 7.1007, 7.2217, 7.1474, 7.2675, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.4762, 7.4034, 7.5204, 7.4482, 7.5644, 7.4927, 7.6082,\n 7.7230, 7.8372, 7.7658, 7.6950, 7.8084, 7.7380, 7.8507, 7.7808, 7.8928,\n 8.0042, 8.1150, 8.0455, 7.9764, 8.0865, 8.0178, 8.1273, 8.0591, 8.1679,\n 8.2762, 8.3840, 8.3161, 8.2486, 8.3557, 8.2887, 8.3952, 8.3286, 8.4345,\n 8.5399, 8.6448, 8.5785, 8.5126, 8.6169, 8.5513, 8.6551, 8.5899, 8.6932,\n 8.7959, 8.8982, 8.8333, 8.7689, 8.8706, 8.8065, 8.9077, 8.8439, 8.9446,\n 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Authorities had no evidence to suggest the two incidents were connected .\nSentence 2: There was no immediate evidence that the two incidents were connected , police said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -1.9262, -1.9757, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -1.9911, -2.0381,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.0548, -2.0997, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.1783,\n -2.2197, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.2381, -2.2780, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.2966, -2.3351, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.3538, -2.3912, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.4099, -2.4461, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.4648, -2.5000,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330, 2.1170,\n 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641, 3.2206, 3.5382,\n 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140, 4.5033, 4.3027, 4.1111,\n 4.3644, 4.6101, 4.8488, 4.6663, 4.8990, 5.1257, 5.3468, 5.1723, 5.0037,\n 4.8407, 4.6829, 4.8999, 5.1121, 5.3199, 5.1671, 5.0186, 5.2223, 5.0779,\n 5.2778, 5.4740, 5.6667, 5.8560, 6.0421, 5.9017, 5.7646, 5.9479, 5.8140,\n 5.9944, 6.1721, 6.3472, 6.5196, 6.6896, 6.5591, 6.4312, 6.3058, 6.4738,\n 6.6395, 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 7.2296, 7.1207,\n 7.2684, 7.1611, 7.0553, 6.9511, 6.8483, 6.7469, 6.6469, 6.7931, 6.6944,\n 6.5970, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640, 6.2725, 6.4153, 6.3248,\n 6.4663, 6.6066, 6.5169, 6.4283, 6.3408, 6.2541, 6.1685, 6.3070, 6.4444,\n 6.5807, 6.4957, 6.4116, 6.3283, 6.4632, 6.3807, 6.5144, 6.6471, 6.5653,\n 6.6968, 6.6157, 6.7462, 6.8757, 7.0043, 6.9237, 6.8439, 6.9714, 7.0980,\n 7.2236, 7.1443, 7.0658, 7.1904, 7.1125, 7.2363, 7.3592, 7.4813, 7.6026,\n 7.7232, 7.6456, 7.5687, 7.6883, 7.6120, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.0476, 7.9729, 8.0880, 8.2024, 8.3162, 8.2420, 8.1683,\n 8.0952, 8.0227, 8.1356, 8.2479, 8.3595, 8.2874, 8.3984, 8.5088, 8.6186,\n 8.5469, 8.6560, 8.5848, 8.6933, 8.6226, 8.5524, 8.4826, 8.4133, 8.3446,\n 8.2762, 8.2084, 8.3161, 8.2486, 8.1817, 8.1151, 8.0490, 7.9833, 8.0902,\n 8.0249, 8.1312, 8.0663, 8.1721, 8.2773, 8.2127, 8.3173, 8.4215, 8.5252,\n 8.6284, 8.7311, 8.8333, 8.9351, 9.0364, 9.1372, 9.0726, 9.1730, 9.1088,\n 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" Jeremy 's a good guy , \" Barber said , adding : \" Jeremy is living the dream life of the New York athlete .\nSentence 2: He also said Shockey is \" living the dream life of a New York athlete .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.1442, -2.1884, -2.2323, -2.0641, -1.8974,\n -1.9420, -1.7772, -1.6138, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.1381,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.7358, -2.7701, -2.8043, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428, 1.2702, 1.1323, 1.0000,\n 0.8729, 0.7505, 0.6325, 0.5185, 0.4082, 0.3015, 0.5941, 0.4880, 0.3849,\n 0.6644, 0.9366, 0.8321, 1.0954, 1.3525, 1.6036, 1.4968, 1.3926, 1.2910,\n 1.1918, 1.0948, 1.3333, 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857,\n 1.0120, 0.9258, 0.8412, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366,\n 1.4434, 1.3606, 1.5635, 1.4812, 1.6803, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.8245, 2.0135, 1.9333, 1.8543, 1.7765, 1.6997, 1.6239, 1.5492, 1.4755,\n 1.4027, 1.3308, 1.5119, 1.4403, 1.3697, 1.5475, 1.4771, 1.4076, 1.3389,\n 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785, 1.1138, 1.0498, 1.2185,\n 1.3856, 1.3213, 1.4863, 1.6498, 1.8116, 1.7467, 1.6823, 1.6186, 1.5556,\n 1.4931, 1.6514, 1.5892, 1.5275, 1.4664, 1.4059, 1.3460, 1.2865, 1.4412,\n 1.3819, 1.3231, 1.4757, 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.7488,\n 1.6906, 1.8371, 1.7792, 1.9242, 1.8664, 1.8091, 1.7522, 1.6958, 1.8385,\n 1.9803, 1.9237, 1.8676, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.5505, 1.4985, 1.4470,\n 1.3957, 1.3448, 1.2943, 1.2441, 1.3779, 1.3278, 1.2780, 1.4105, 1.5423,\n 1.4923, 1.6230, 1.7529, 1.8821, 1.8317, 1.7817, 1.7321, 1.6827, 1.6336,\n 1.7609, 1.7119, 1.6632, 1.6148, 1.5667, 1.5189, 1.4713, 1.5967, 1.5492,\n 1.5020, 1.6262, 1.7498, 1.7025, 1.8252, 1.7780, 1.7310, 1.6843, 1.6378,\n 1.7592, 1.7128, 1.8333, 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592,\n 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Ratliff 's daughters , Margaret and Martha Ratliff , were adopted by Peterson after their mother 's death .\nSentence 2: Peterson helped raise Ratliff 's two daughters , Margaret and Martha Ratliff , who supported him throughout the trial .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.64", + "p value": "0.000135", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.1831, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.8489, 2.0889, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.6977, 1.6013, 1.5068, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.7889, 1.9959, 1.9064, 2.1094, 2.3094,\n 2.2200, 2.4163, 2.6098, 2.5205, 2.4327, 2.3462, 2.5352, 2.4495,\n 2.3651, 2.2819, 2.4667, 2.3842, 2.5660, 2.7456, 2.9231, 3.0984,\n 3.0151, 2.9329, 2.8518, 2.7717, 2.6928, 2.6148, 2.5378, 2.7080,\n 2.8764, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.5927,\n 2.5207, 2.4495, 2.6112, 2.5403, 2.7001, 2.6296, 2.5600, 2.4910,\n 2.6481, 2.8039, 2.7349, 2.8889, 3.0415, 2.9726, 3.1236, 3.2733,\n 3.2044, 3.1363, 3.0688, 3.2163, 3.1492, 3.0827, 3.0168, 3.1623,\n 3.0967, 3.2408, 3.3838, 3.5256, 3.6664, 3.6004, 3.5350, 3.4701,\n 3.4058, 3.3420, 3.2788, 3.4171, 3.3542, 3.2918, 3.2299, 3.3665,\n 3.3049, 3.4403, 3.3789, 3.3181, 3.2577, 3.1977, 3.1382, 3.0792,\n 3.2124, 3.1536, 3.2857, 3.2271, 3.1690, 3.1113, 3.0540, 2.9971,\n 2.9406, 3.0706, 3.0143, 2.9584, 3.0872, 3.0315, 3.1593, 3.1038,\n 3.0486, 2.9938, 3.1203, 3.2460, 3.1912, 3.3160, 3.4401, 3.3853,\n 3.5085, 3.6310, 3.5762, 3.5218, 3.4677, 3.5890, 3.5351, 3.4816,\n 3.4283, 3.5485, 3.4954, 3.6148, 3.7335, 3.8516, 3.9691, 3.9158,\n 3.8627, 3.8100, 3.7576, 3.7055, 3.6537, 3.6021, 3.7180, 3.8333,\n 3.7818, 3.7306, 3.6797, 3.6291, 3.5787, 3.5286, 3.6425])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.7181, 9.8473,\n 9.7306, 9.8590, 9.9863, 10.1124, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.8542, 10.7429, 10.8616, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.5655,\n 11.6772, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 11.9083, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.4471, 12.5517, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.9662, 13.0674, 13.1680, 13.2680,\n 13.1701, 13.2698, 13.1730, 13.2722, 13.3710, 13.4691, 13.5668, 13.4715,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.9530, 14.0479, 13.9543, 14.0488,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.3301, 14.4225, 14.5144, 14.6059,\n 14.6970, 14.6062, 14.6970, 14.6070, 14.6976, 14.7877, 14.8773, 14.9666,\n 14.8779, 14.9669, 15.0555, 15.1438, 15.2316, 15.3191, 15.4062, 15.3188,\n 15.4057, 15.3191, 15.4057, 15.4919, 15.5778, 15.6634, 15.7485, 15.8334,\n 15.9179, 16.0020, 15.9170, 16.0009, 15.9165, 16.0002, 16.0836, 16.1667,\n 16.2494, 16.1660, 16.2486, 16.3308, 16.4127, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: After losing as much as 84.56 earlier , the Dow Jones industrial average closed up 22.81 , or 0.2 percent , at 9,340.45 .\nSentence 2: In midday trading , the Dow Jones industrial average lost 68.84 , or 0.7 percent , to 9,248.80 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.5", + "p value": "0.933", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.4976])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990, 5.1962,\n 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509, 6.0212, 5.7155,\n 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569, 5.8890, 5.6614, 5.4444,\n 5.2372, 5.0389, 4.8488, 4.6663, 4.4907, 4.3217, 4.1586, 4.0012, 4.2339,\n 4.4610, 4.3083, 4.5301, 4.3818, 4.2378, 4.4544, 4.3142, 4.5260, 4.3894,\n 4.2563, 4.1265, 4.0000, 3.8765, 4.0825, 3.9614, 4.1633, 4.0446, 3.9284,\n 3.8146, 3.7033, 3.5942, 3.4873, 3.6831, 3.5777, 3.4743, 3.3729, 3.2733,\n 3.4641, 3.6522, 3.5533, 3.4562, 3.3607, 3.2667, 3.1743, 3.0833, 2.9938,\n 3.1760, 3.3558, 3.2667, 3.4438, 3.3556, 3.2686, 3.4427, 3.3566, 3.5283,\n 3.4429, 3.3587, 3.2757, 3.1937, 3.1129, 3.2806, 3.2004, 3.3659, 3.2863,\n 3.2077, 3.1300, 3.2928, 3.2157, 3.1394, 3.2998, 3.4586, 3.3826, 3.3075,\n 3.2332, 3.1597, 3.0870, 3.0151, 2.9439, 2.8735, 2.8039, 2.7349, 2.6667,\n 2.5991, 2.5322, 2.6852, 2.6186, 2.7699, 2.7037, 2.6381, 2.5731, 2.7222,\n 2.6575, 2.8051, 2.7406, 2.8868, 2.8226, 2.9673, 3.1109, 3.0467, 3.1889,\n 3.3301, 3.2660, 3.4058, 3.3420, 3.2788, 3.2161, 3.1539, 3.0923, 3.2299,\n 3.3665, 3.3049, 3.4403, 3.3789, 3.3181, 3.4521, 3.3915, 3.5245, 3.4641,\n 3.4042, 3.3447, 3.2857, 3.2271, 3.3582, 3.2998, 3.4298, 3.3717, 3.5007,\n 3.6289, 3.5708, 3.6980, 3.8244, 3.7664, 3.8919, 3.8341, 3.7766, 3.7196,\n 3.6629, 3.6067, 3.7306, 3.8538, 3.7975, 3.9198, 3.8638, 3.8081, 3.9294,\n 3.8740, 3.9945, 3.9392, 3.8843, 3.8297, 3.7755, 3.7216, 3.8406, 3.7869,\n 3.9052, 3.8516, 3.9691, 4.0860, 4.0325, 4.1487, 4.2642, 4.2108, 4.3256,\n 4.2723, 4.2193, 4.1667, 4.1143, 4.0622, 4.1758, 4.2889, 4.2369, 4.3492,\n 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Rumsfeld , who has been feuding for two years with Army leadership , passed over nine active-duty four-star generals .\nSentence 2: Rumsfeld has been feuding for a long time with Army leadership , and he passed over nine active-duty four-star generals .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, 0.0436, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.1240, -0.1650, -0.2057, -0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Consumers who said jobs are difficult to find jumped from 29.4 to 32.6 , while those claiming work was plentiful slipped from 13 to 12.6 .\nSentence 2: Consumers who said jobs are difficult to find jumped to 32.6 from 29.4 , while those saying work was plentiful slipped to 12.6 from 13 in April .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.5175, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.6353, 1.8728, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.7811, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.5533, 3.7383, 3.9208, 4.1008, 4.0024, 4.1797, 4.3546,\n 4.2571, 4.4296, 4.3333, 4.2385, 4.1451, 4.3146, 4.2222, 4.1312,\n 4.2981, 4.4630, 4.6262, 4.7875, 4.9472, 5.1051, 5.2614, 5.4160,\n 5.5691, 5.4772, 5.6286, 5.5377, 5.6875, 5.8358, 5.7458, 5.8926,\n 6.0380, 6.1820, 6.0927, 6.2354, 6.3768, 6.5169, 6.6559, 6.7937,\n 6.9303, 6.8414, 6.7536, 6.8889, 6.8019, 6.7159, 6.8500, 6.9830,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.8406, 7.7566, 7.6734, 7.5910, 7.5094, 7.6335, 7.7567,\n 7.8791, 8.0006, 8.1214, 8.2413, 8.3605, 8.2793, 8.1989, 8.3172,\n 8.4348, 8.3550, 8.4718, 8.3927, 8.5088, 8.6241, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.4619, 9.5695, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.2419, 10.1690, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.5998, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.7527, 10.8505, 10.9480, 10.8770, 10.9740, 10.9034, 11.0000,\n 11.0961, 11.0261, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He has no immediate plans for television advertising , believing it is unnecessary this early .\nSentence 2: A Lieberman aide said there were no immediate plans for television advertising .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 7.8174, 7.9704, 7.8416, 7.9931, 7.8667, 7.7426, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.6681, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.7482, 8.6459, 8.5448, 8.6747, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 8.7913, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.3042, 9.2143, 9.1252, 9.2435, 9.1553, 9.2729, 9.1856,\n 9.3024, 9.2159, 9.1302, 9.0453, 8.9612, 8.8778, 8.9940, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.3688, 11.2924, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.4356, 11.5329, 11.4581, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.2607, 11.3572, 11.4533, 11.5489, 11.6441, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.9487, 11.8769, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" I think it happened very quickly , \" Houston Police Department homicide investigator Phil Yochum said of the crime .\nSentence 2: \" I think it happened very quickly , \" said Investigator Phil Yochum of the Houston Police Department 's homicide division .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.3819, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.6246, -1.4762, -1.5187, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.5453, -1.5848, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "223", + "# Tokens in Greenlist": "93", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.76", + "p value": "4.19e-09", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426, 4.0415, 4.3027, 4.5556,\n 4.8008, 5.0389, 5.2705, 5.0811, 4.8990, 4.7237, 4.5547, 4.3916, 4.6188,\n 4.8407, 5.0576, 4.8999, 4.7469, 4.5985, 4.4544, 4.3142, 4.5260, 4.7336,\n 4.9373, 4.8003, 4.6667, 4.5363, 4.4091, 4.2848, 4.4836, 4.6790, 4.8712,\n 4.7488, 4.6291, 4.5118, 4.3970, 4.2844, 4.4721, 4.6571, 4.8394, 4.7281,\n 4.6188, 4.5115, 4.4061, 4.3026, 4.4809, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.5274, 4.4296, 4.3333, 4.5034, 4.6715, 4.8375, 4.7419, 4.6476, 4.5547,\n 4.4630, 4.3727, 4.5356, 4.6967, 4.8561, 4.7662, 4.6775, 4.5899, 4.5035,\n 4.4182, 4.5747, 4.7296, 4.8830, 4.7980, 4.7140, 4.6311, 4.5491, 4.4680,\n 4.6188, 4.7682, 4.9163, 4.8355, 4.7556, 4.6765, 4.5983, 4.5210, 4.6667,\n 4.8111, 4.9543, 4.8772, 4.8008, 4.7252, 4.6503, 4.5762, 4.7173, 4.8572,\n 4.9960, 4.9221, 4.8488, 4.7763, 4.7044, 4.6332, 4.7700, 4.9058, 5.0406,\n 4.9695, 4.8990, 4.8291, 4.7599, 4.6912, 4.8242, 4.9562, 5.0873, 5.0187,\n 4.9507, 4.8833, 4.8164, 4.7501, 4.8795, 5.0080, 5.1357, 5.0694, 5.0037,\n 4.9385, 4.8737, 4.8095, 4.9356, 5.0609, 5.1854, 5.1213, 5.0576, 4.9943,\n 4.9316, 4.8693, 4.9923, 5.1146, 5.2362, 5.1739, 5.1121, 5.0507, 4.9897,\n 4.9292, 5.0494, 5.1689, 5.2877, 5.2272, 5.1671, 5.1073, 5.0480, 4.9891,\n 5.1066, 5.2235, 5.3398, 5.2809, 5.2223, 5.1642, 5.1064, 5.0489, 5.1640,\n 5.2784, 5.3923, 5.3349, 5.2778, 5.2211, 5.1647, 5.1086, 5.2213, 5.3335,\n 5.4451, 5.3891, 5.3333, 5.2779, 5.2229, 5.1681, 5.2786, 5.3886, 5.4981,\n 5.4433, 5.3889, 5.3347, 5.2809, 5.2273, 5.3358, 5.4437, 5.5512, 5.4976,\n 5.4444, 5.3914, 5.3387, 5.2863, 5.3928, 5.4988, 5.6043, 5.5519, 5.4997,\n 5.4478, 5.3962, 5.3449, 5.4495, 5.5537, 5.6574, 5.7607])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: United issued a statement saying it will \" work professionally and cooperatively with all its unions . \"\nSentence 2: Senior vice president Sara Fields said the airline \" will work professionally and cooperatively with all our unions . \"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.2678, -2.0665, -2.1167, -1.9189, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.7000, -1.7496, -1.7988, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.6958, -1.5404, -1.5842,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.3904, -1.4335, -1.2857, -1.3288, -1.3717, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.3620,\n -1.2310, -1.2700, -1.1399, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.9530, 5.7735, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 8.7599, 8.8991, 8.7788,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 10.9769, 11.0883, 10.9917, 10.8960, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.0389, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 12.0218, 11.9319,\n 12.0345, 11.9455, 11.8571, 11.7696, 11.8719, 11.9737, 12.0749, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.4015, 12.4998, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.8160, 12.9116, 12.8285,\n 12.7461, 12.6643, 12.7597, 12.8546, 12.9491, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.6429, 13.7327, 13.6546, 13.5771, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.6914, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" But they never climb out of the pot of beer again . \"\nSentence 2: It 's just that they never climb out of the beer again . \"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "10", + "Fraction of T in Greenlist": "5.0%", + "z-score": "-6.51", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.4495, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.6833, -2.7351, -2.7863, -2.8368, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -3.2240, -3.2686, -3.3128, -3.3566,\n -3.4000, -3.4429, -3.4855, -3.5277, -3.5695, -3.6109, -3.6520, -3.6927,\n -3.7331, -3.7732, -3.8129, -3.8523, -3.8914, -3.9302, -3.9687, -4.0069,\n -4.0449, -4.0825, -4.1198, -3.9260, -3.9639, -4.0016, -4.0390, -4.0762,\n -4.1131, -4.1497, -4.1861, -4.2222, -4.2581, -4.2938, -4.3292, -4.3644,\n -4.3993, -4.4341, -4.4686, -4.5029, -4.5370, -4.5708, -4.6045, -4.6380,\n -4.6713, -4.7044, -4.7373, -4.7700, -4.8025, -4.8348, -4.8670, -4.8990,\n -4.9308, -4.9624, -4.9939, -5.0252, -5.0563, -5.0873, -5.1181, -5.1488,\n -5.1793, -5.2096, -5.2398, -5.2699, -5.2998, -5.3295, -5.3591, -5.3886,\n -5.2262, -5.2560, -5.2857, -5.3153, -5.3447, -5.3740, -5.4032, -5.4322,\n -5.4611, -5.4899, -5.5185, -5.5470, -5.5754, -5.6036, -5.6318, -5.6598,\n -5.6877, -5.7155, -5.7431, -5.7707, -5.7981, -5.8254, -5.8526, -5.8797,\n -5.9067, -5.9336, -5.9604, -5.9871, -6.0136, -6.0401, -6.0665, -6.0927,\n -6.1189, -6.1449, -6.1709, -6.1968, -6.2225, -6.2482, -6.2738, -6.2993,\n -6.3247, -6.3500, -6.3752, -6.4004, -6.4254, -6.2828, -6.3081, -6.3333,\n -6.3585, -6.3835, -6.4085, -6.4333, -6.4581, -6.4828, -6.5074])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.2372, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 5.9346, 5.8241, 5.7155,\n 5.6086, 5.7719, 5.9333, 6.0928, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.9547, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.1157, 10.0261, 9.9373, 9.8494, 9.9625, 9.8753, 9.9878, 9.9015,\n 10.0133, 9.9278, 9.8430, 9.9542, 10.0647, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.0342, 9.9524, 9.8712, 9.7908, 9.7109, 9.6317, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.7997, 9.9067, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.0701, 9.9940, 9.9184, 9.8433, 9.9481, 9.8736, 9.9778,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.8186, 10.9178, 11.0165, 11.1148, 11.2126, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Chief financial officer Andy Bryant has said that hike had a greater affect volume than officials expected .\nSentence 2: Bryant has said that hike had a greater effect on demand than officials expected .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "6", + "Fraction of T in Greenlist": "24.0%", + "z-score": "-0.115", + "p value": "0.546", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 5.7735,\n 5.4611, 5.7155, 5.9604, 6.1968, 5.9214, 6.1546, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.3333, 6.5465, 6.7543, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 6.7337, 6.5433, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 7.8355, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 7.8628, 7.7152,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 8.9815,\n 8.8522, 8.7250, 8.8667, 9.0068, 9.1455, 9.2828, 9.1589, 9.0370,\n 8.9169, 9.0536, 8.9355, 9.0711, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.4169,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 10.9109,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.2623, 11.3740, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.6139, 11.5157, 11.6242, 11.5271, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.7766, 11.8818,\n 11.9863, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.2150, 12.1244,\n 12.2263, 12.1366, 12.0476, 12.1492, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.3754, 12.4746, 12.5732, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.7918, 12.7073, 12.8037, 12.7199, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.3463, 13.2668, 13.3585, 13.2796,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.5897, 13.6789, 13.6025, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Robert Liscouski , the Assistant Secretary of Homeland Security for Infrastructure Protection , will oversee NCSD .\nSentence 2: NCSD 's chief will be Robert Liscouski , the assistant secretary of Homeland Security for Infrastructure Protection .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.1761, 0.1317, 0.0875, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: HP 's shipments increased 48 percent year-over-year , compared to an increase of 31 percent for Dell .\nSentence 2: HPs shipments increased 48 per cent year-on-year , compared to an increase of 31 per cent for Dell .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.3411, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.4440, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 11.9187, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.2503, 12.3508, 12.4508, 12.3629,\n 12.2758, 12.3754, 12.4746, 12.5732, 12.6713, 12.7690, 12.8661, 12.7802,\n 12.8769, 12.9732, 12.8881, 12.8037, 12.8997, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.1957, 13.2895, 13.3829, 13.3002, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.6896, 13.7803, 13.8707, 13.7904,\n 13.7106, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.0784, 14.1667,\n 14.2546, 14.1764, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Chirac , who can pardon a law-breaker , refused Humbert 's request last year but kept in close touch with the family .\nSentence 2: Chirac , who has the authority to pardon law-breakers , refused Humbert 's request to be allowed to die last year but kept in close touch with the family .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.0428, -2.1019, -2.1602,\n -2.2177, -1.9711, -2.0294, -1.7889, -1.8481, -1.9064, -1.9640, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -2.0137, -2.0656,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.7614, -1.8071, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.6081, -1.6521, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.3443, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.8287, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.7358, -2.6047, -2.6393, -2.6737, -2.7080, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The charges allege that he was part of the conspiracy to kill and kidnap persons in a foreign country .\nSentence 2: The government now charges that Sattar conspired with Rahman to kill and kidnap individuals in foreign countries .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.1140, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.0235, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415, 4.3409, 4.6268,\n 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 4.7140, 4.5033, 4.3027, 4.5556,\n 4.3644, 4.1812, 4.0056, 4.2515, 4.0825, 3.9196, 4.1586, 4.3916, 4.6188,\n 4.4610, 4.3083, 4.1603, 4.0166, 3.8772, 4.0980, 4.3142, 4.1779, 4.0451,\n 3.9158, 4.1265, 4.0000, 3.8765, 3.7559, 3.9614, 3.8431, 3.7273, 3.9284,\n 4.1260, 4.3205, 4.2060, 4.0937, 3.9837, 3.8759, 3.7700, 3.9595, 4.1461,\n 4.0415, 3.9386, 3.8376, 4.0205, 3.9208, 3.8228, 4.0024, 3.9056, 3.8103,\n 3.7166, 3.8927, 3.8000, 3.9736, 3.8819, 3.7916, 3.7025, 3.6148, 3.5283,\n 3.6979, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389, 3.8555, 3.7732,\n 3.6919, 3.8523, 4.0112, 3.9302, 3.8503, 3.7712, 3.9276, 4.0825, 4.0038,\n 3.9260, 3.8490, 4.0016, 4.1528, 4.0762, 4.0004, 3.9254, 4.0745, 4.2222,\n 4.1475, 4.0736, 4.0004, 4.1461, 4.2907, 4.2178, 4.1455, 4.0740, 4.2167,\n 4.3583, 4.2870, 4.2164, 4.1464, 4.2862, 4.4249, 4.3552, 4.2861, 4.2176,\n 4.3547, 4.4907, 4.4225, 4.3548, 4.2877, 4.4222, 4.5557, 4.4888, 4.4224,\n 4.3566, 4.4887, 4.6198, 4.5542, 4.4891, 4.4246, 4.5543, 4.6832, 4.6188,\n 4.5549, 4.4915, 4.6190, 4.7458, 4.6825, 4.6198, 4.5575, 4.6829, 4.8076,\n 4.7455, 4.6838, 4.6225, 4.7460, 4.8687, 4.8076, 4.7469, 4.6867, 4.8083,\n 4.9292, 4.8690, 4.8093, 4.7500, 4.8698, 4.9889, 4.9297, 4.8709, 4.8125,\n 4.9305, 5.0479, 4.9896, 4.9317, 4.8742, 4.9906, 5.1064, 5.0489, 4.9918,\n 4.9351, 5.0499, 5.1642, 5.1075, 5.0513, 4.9953, 5.1086, 5.2213, 5.1655,\n 5.1100, 5.0548, 5.1667, 5.2779, 5.2229, 5.1681, 5.1137, 5.2241, 5.3340,\n 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: In the 2002 study , the margin of error ranged from 1.8 to 4.4 percentage points .\nSentence 2: It has a margin of error of plus or minus three to four percentage points .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.2449, 0.1952, 0.1459, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.5695, 0.5203, 0.4714, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.5952, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Mr. Clinton 's national security adviser , Sandy Berger , said that the White House wasn 't informed of the FBI activities .\nSentence 2: Clinton \u2019 s national security adviser , Sandy Berger , said in an interview that the White House was not informed of the FBI activities .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.2673, -2.0918, -2.1372, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -2.0641, -2.1082,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -1.8371,\n -1.8808, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.5247, -2.5620, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 3.6148, 3.4017, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.6376, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.7181, 9.6011,\n 9.4858, 9.6156, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.5363, 10.6547, 10.5490, 10.6667, 10.7835, 10.8995, 10.7955, 10.9109,\n 11.0254, 10.9229, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.3950, 11.5048, 11.6139, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.3809, 12.2868, 12.1936, 12.2963, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 12.9165, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.3128, 13.4086, 13.5039, 13.5987, 13.6931,\n 13.7870, 13.6990, 13.6117, 13.5250, 13.6188, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.2686, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.2939, 14.3828, 14.4714,\n 14.5595, 14.4780, 14.5659, 14.6534, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" This decision is clearly incorrect , \" FTC Chairman Timothy Muris said in a written statement .\nSentence 2: The decision is \" clearly incorrect , \" FTC Chairman Tim Muris said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.64", + "p value": "0.000135", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 2.8301, 3.0792, 2.9424, 2.8098, 2.6811, 2.9212,\n 2.7952, 3.0290, 2.9055, 2.7852, 3.0123, 3.2348, 3.1160, 3.3333,\n 3.2167, 3.1027, 2.9913, 3.2026, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 3.1840, 3.0817, 3.2796, 3.1787, 3.0796, 2.9823, 3.1754,\n 3.0793, 3.2691, 3.1741, 3.0806, 2.9887, 3.1743, 3.0833, 3.2660,\n 3.1760, 3.0873, 3.0000, 3.1789, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.0151, 3.1879, 3.1052, 3.2757, 3.1937, 3.1129, 3.0330, 3.2004,\n 3.1211, 3.2863, 3.2077, 3.1300, 3.0533, 3.2157, 3.1394, 3.2998,\n 3.2242, 3.1493, 3.0754, 3.2332, 3.1597, 3.3156, 3.2426, 3.1704,\n 3.0989, 3.2525, 3.1814, 3.3333, 3.2627, 3.1928, 3.1236, 3.2733,\n 3.2044, 3.3526, 3.2841, 3.2163, 3.1492, 3.2953, 3.2285, 3.3731,\n 3.3066, 3.2408, 3.1755, 3.3182, 3.2533, 3.3947, 3.3301, 3.2660,\n 3.2025, 3.3420, 3.2788, 3.4171, 3.3542, 3.2918, 3.2299, 3.3665,\n 3.3049, 3.4403, 3.3789, 3.3181, 3.2577, 3.3915, 3.3314, 3.4641,\n 3.4042, 3.3447, 3.2857, 3.4170, 3.3582, 3.4884, 3.4298, 3.3717,\n 3.3140, 3.4428, 3.3853, 3.5131, 3.4558, 3.3989, 3.3424, 3.4689,\n 3.4126, 3.5382, 3.4821, 3.4263, 3.3710, 3.4953, 3.4401, 3.5635,\n 3.5085, 3.4539, 3.3996, 3.5218, 3.4677, 3.5890, 3.5351, 3.4816,\n 3.4283, 3.5485, 3.4954, 3.6148, 3.5619, 3.5093, 3.4570, 3.5753,\n 3.5232, 3.6407, 3.5887, 3.5370, 3.4857, 3.6021, 3.5509, 3.6667,\n 3.6156, 3.5648, 3.5143, 3.6291, 3.5787, 3.6927, 3.6425])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.7174, 7.6210, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.7439, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.9285, 9.0453, 8.9612, 8.8778, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.1735, 9.0923, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.3017, 9.2232, 9.3338, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.5066, 9.4299, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.7072, 9.6322, 9.7380, 9.8433, 9.9481, 10.0523, 9.9778,\n 9.9038, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.1690, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.4281, 10.3566, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.5128, 10.6111, 10.5410, 10.4713, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.7222, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Prosecutors allege that Nichols and co-conspirator Timothy McVeigh worked together to prepare a bomb that destroyed the Alfred P. Murrah Federal Building .\nSentence 2: Prosecutors allege that Nichols and coconspirator Timothy McVeigh worked together to prepare a 4,000-pound fuel-and-fertilizer bomb that destroyed the Murrah building .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.4874, -2.3150, -2.3586, -2.4019, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.1433, -2.9950, -3.0315, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -3.0657, -3.1013, -3.1368, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.1334,\n -3.1679, -3.2023, -3.0639, -3.0984, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -2.9633, -2.9976, -3.0317, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.1009, -3.1342, -3.1674, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415, 3.7808, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140, 4.9652, 4.7556, 4.5556,\n 4.8008, 4.6101, 4.8488, 4.6663, 4.8990, 5.1257, 5.3468, 5.1723, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.5234, 5.3708, 5.5705, 5.7664,\n 5.9588, 5.8108, 5.6667, 5.8560, 5.7155, 5.5783, 5.4444, 5.6307, 5.4997,\n 5.6830, 5.5549, 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.0125, 6.1828,\n 6.3509, 6.2302, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.5137, 6.6679, 6.5607, 6.7132, 6.6075,\n 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 6.9945, 7.1393, 7.2827, 7.1813,\n 7.3233, 7.2232, 7.1243, 7.2650, 7.4044, 7.3068, 7.2104, 7.1152, 7.2532,\n 7.1591, 7.0662, 6.9743, 7.1110, 7.0201, 7.1556, 7.0657, 7.2001, 7.3333,\n 7.4655, 7.3765, 7.5076, 7.4194, 7.5494, 7.6785, 7.5912, 7.7192, 7.6328,\n 7.7598, 7.6742, 7.8003, 7.7155, 7.6315, 7.5484, 7.6734, 7.5910, 7.7152,\n 7.6335, 7.7567, 7.6758, 7.7981, 7.7178, 7.8393, 7.9600, 8.0798, 8.0002,\n 8.1192, 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.2365, 8.3525, 8.2754,\n 8.3906, 8.3140, 8.2381, 8.1628, 8.2772, 8.2024, 8.1282, 8.0546, 8.1683,\n 8.0952, 8.2082, 8.1356, 8.2479, 8.3595, 8.4706, 8.3984, 8.5088, 8.4371,\n 8.5469, 8.6560, 8.5848, 8.6933, 8.6226, 8.7305, 8.6603, 8.7676, 8.6978,\n 8.6284, 8.5595, 8.6662, 8.5978, 8.5298, 8.4623, 8.5683, 8.5012, 8.6066,\n 8.5399, 8.6448, 8.7492, 8.8531, 8.7867, 8.8900, 8.8240, 8.9268, 9.0292,\n 8.9635, 9.0653, 9.0000, 9.1013, 9.0364, 9.1372, 9.2376, 9.1730, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Terri Schiavo , 39 , is expected to die sometime in the next two weeks in the Tampa-area hospice where she has spent the past several years .\nSentence 2: Terri Schiavo , 39 , underwent the procedure at the Tampa Bay area hospice where she has been living for several years , said her father , Bob Schindler .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.8006, 0.7441, 0.8997, 1.0541,\n 0.9972, 0.9409, 0.8850, 1.0370, 0.9812, 0.9258, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.9739, 0.9245, 1.0598, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.9742, 0.9272, 0.8805, 0.8340, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 0.9062, 0.8607, 0.8154, 0.9415, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 1.0000,\n 0.9558, 1.0777, 1.0336, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "169", + "Fraction of T in Greenlist": "84.9%", + "z-score": "19.5", + "p value": "3.55e-85", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.3271, 7.5144, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 9.7312, 9.8754,\n 10.0178, 10.1585, 10.2976, 10.4350, 10.5709, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 11.2022, 11.0513, 11.1791, 11.3056, 11.4310,\n 11.5551, 11.6781, 11.8000, 11.6559, 11.7773, 11.8977, 12.0170, 12.1353,\n 12.2527, 12.3690, 12.2309, 12.3468, 12.4619, 12.5760, 12.6892, 12.8015,\n 12.9130, 12.7802, 12.8913, 13.0017, 13.1112, 13.2199, 13.3279, 13.4350,\n 13.3070, 13.4139, 13.5200, 13.6255, 13.7302, 13.8342, 13.9376, 13.8138,\n 13.9169, 14.0193, 14.1211, 14.2222, 14.3227, 14.4226, 14.3027, 14.4024,\n 14.5014, 14.5999, 14.6978, 14.7952, 14.8919, 14.7755, 14.8721, 14.9681,\n 15.0636, 15.1585, 15.2530, 15.3469, 15.2337, 15.3275, 15.4207, 15.5134,\n 15.6057, 15.6975, 15.7888, 15.6786, 15.7697, 15.8604, 15.9506, 16.0404,\n 16.1297, 16.2186, 16.1112, 16.1999, 16.2883, 16.3762, 16.4636, 16.5507,\n 16.6374, 16.5325, 16.6190, 16.7052, 16.7909, 16.8763, 16.9613, 17.0459,\n 16.9434, 17.0279, 17.1120, 17.1957, 17.2791, 17.3621, 17.4448, 17.3445,\n 17.4271, 17.5093, 17.5912, 17.6727, 17.7539, 17.8348, 17.7367, 17.8174,\n 17.8979, 17.9780, 18.0578, 18.1373, 18.2165, 18.1203, 18.1994, 18.2782,\n 18.3566, 18.4348, 18.5127, 18.5903, 18.4960, 18.5735, 18.6507, 18.7276,\n 18.8043, 18.8807, 18.9568, 18.8642, 18.9402, 19.0160, 19.0914, 19.1667,\n 19.2416, 19.3163, 19.2254, 19.3000, 19.3744, 19.4485, 19.5223])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Secretary of State Colin Powell designated the Chechen leader believed responsible for last year 's hostage standoff in a Moscow theater as a threat to U.S. security Friday .\nSentence 2: U.S. Secretary of State Colin Powell on Friday designated Chechen rebel leader Shamil Basayev a threat to the security of the United States and to U.S. citizens .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "29.2%", + "z-score": "0.788", + "p value": "0.215", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.7877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426, 4.0415, 4.3027, 4.5556,\n 4.3644, 4.6101, 4.4272, 4.6663, 4.4907, 4.3217, 4.1586, 4.3916, 4.2339,\n 4.4610, 4.3083, 4.1603, 4.0166, 3.8772, 3.7417, 3.9620, 4.1779, 4.3894,\n 4.5968, 4.8003, 4.6667, 4.5363, 4.4091, 4.6082, 4.8038, 4.6790, 4.8712,\n 4.7488, 4.6291, 4.5118, 4.7002, 4.5850, 4.7703, 4.6571, 4.8394, 4.7281,\n 4.6188, 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.4222, 5.3245,\n 5.2281, 5.3867, 5.2915, 5.4482, 5.6032, 5.5090, 5.6622, 5.5691, 5.7207,\n 5.6286, 5.5377, 5.6875, 5.8358, 5.9827, 6.1283, 6.2725, 6.4153, 6.3248,\n 6.4663, 6.3768, 6.2883, 6.4283, 6.5672, 6.7049, 6.8414, 6.9768, 7.1111,\n 7.0231, 6.9361, 7.0692, 6.9830, 7.1149, 7.0296, 7.1605, 7.2904, 7.2058,\n 7.3346, 7.4625, 7.5895, 7.7155, 7.8406, 7.7566, 7.6734, 7.7976, 7.7152,\n 7.8384, 7.9608, 7.8791, 8.0006, 7.9196, 8.0403, 7.9600, 7.8803, 7.8014,\n 7.9212, 7.8429, 7.9619, 7.8842, 7.8072, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.0880, 8.0139, 8.1282, 8.0546, 8.1683,\n 8.2813, 8.2082, 8.3205, 8.2479, 8.1758, 8.1043, 8.2158, 8.1448, 8.2557,\n 8.1851, 8.2954, 8.2252, 8.1556, 8.2652, 8.3742, 8.3050, 8.4133, 8.3446,\n 8.2762, 8.3840, 8.4911, 8.5978, 8.7039, 8.6359, 8.5683, 8.6738, 8.6066,\n 8.5399, 8.6448, 8.7492, 8.6828, 8.7867, 8.8900, 8.9929, 9.0952, 9.1971,\n 9.2986, 9.2324, 9.1667, 9.2676, 9.2022, 9.3026, 9.4026, 9.3375, 9.4370,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" Today , we are trying to convey this problem to Russian President Vladimir Putin and US President George W Bush . \"\nSentence 2: \" Today , we are trying to convey this problem to Russian President Vladimir Putin ( news - web sites ) and President Bush ( news - web sites ) . \"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -0.7332, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -0.8374, -0.6956, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "169", + "Fraction of T in Greenlist": "84.9%", + "z-score": "19.5", + "p value": "3.55e-85", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.3271, 7.5144, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 9.7312, 9.8754,\n 10.0178, 10.1585, 10.2976, 10.4350, 10.5709, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 11.2022, 11.0513, 11.1791, 11.3056, 11.4310,\n 11.5551, 11.6781, 11.8000, 11.6559, 11.7773, 11.8977, 12.0170, 12.1353,\n 12.2527, 12.3690, 12.2309, 12.3468, 12.4619, 12.5760, 12.6892, 12.8015,\n 12.9130, 12.7802, 12.8913, 13.0017, 13.1112, 13.2199, 13.3279, 13.4350,\n 13.3070, 13.4139, 13.5200, 13.6255, 13.7302, 13.8342, 13.9376, 13.8138,\n 13.9169, 14.0193, 14.1211, 14.2222, 14.3227, 14.4226, 14.3027, 14.4024,\n 14.5014, 14.5999, 14.6978, 14.7952, 14.8919, 14.7755, 14.8721, 14.9681,\n 15.0636, 15.1585, 15.2530, 15.3469, 15.2337, 15.3275, 15.4207, 15.5134,\n 15.6057, 15.6975, 15.7888, 15.6786, 15.7697, 15.8604, 15.9506, 16.0404,\n 16.1297, 16.2186, 16.1112, 16.1999, 16.2883, 16.3762, 16.4636, 16.5507,\n 16.6374, 16.5325, 16.6190, 16.7052, 16.7909, 16.8763, 16.9613, 17.0459,\n 16.9434, 17.0279, 17.1120, 17.1957, 17.2791, 17.3621, 17.4448, 17.3445,\n 17.4271, 17.5093, 17.5912, 17.6727, 17.7539, 17.8348, 17.7367, 17.8174,\n 17.8979, 17.9780, 18.0578, 18.1373, 18.2165, 18.1203, 18.1994, 18.2782,\n 18.3566, 18.4348, 18.5127, 18.5903, 18.4960, 18.5735, 18.6507, 18.7276,\n 18.8043, 18.8807, 18.9568, 18.8642, 18.9402, 19.0160, 19.0914, 19.1667,\n 19.2416, 19.3163, 19.2254, 19.3000, 19.3744, 19.4485, 19.5223])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: That would be a potential setback to Chief Executive Phil Condit 's strategy of bolstering defense-related sales during a slump in jetliner deliveries .\nSentence 2: The inquiry may hinder Chief Executive Phil Condit 's strategy of bolstering defense-related sales during a slump in jetliner deliveries .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.1690, -1.9829, -1.7988, -1.8475, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.4289,\n -1.2708, -1.1140, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -0.9173, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.4938, -1.3620,\n -1.4008, -1.4393, -1.3088, -1.3474, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: PeopleSoft also said its board had officially rejected Oracle 's offer .\nSentence 2: Thursday morning , PeopleSoft 's board rejected the Oracle takeover offer .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.1170, -1.9127, -1.9645, -2.0158, -1.8161, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.7468, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.3362, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.3377, -1.3771, -1.4162, -1.4551, -1.3230, -1.1918,\n -1.2310, -1.2700, -1.1399, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "68.0%", + "z-score": "9.79", + "p value": "6.23e-23", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The Dow Jones industrial average ended the day down 10.89 at 9,837.94 , after advancing 111.04 Wednesday .\nSentence 2: The Dow Jones industrial average fell 10.89 points , or 0.11 percent , to 9,837.94 .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.0626, -0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.1491, -0.1980,\n -0.0493, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.1849, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.2122, -0.2540, -0.1267, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.0829, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "65.8%", + "z-score": "8.04", + "p value": "4.45e-16", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415, 4.3409, 4.0825,\n 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855, 5.4271, 5.2085, 5.0000,\n 5.2372, 5.4678, 5.6921, 5.4958, 5.7155, 5.5277, 5.3468, 5.5626, 5.7735,\n 5.9797, 6.1815, 6.3791, 6.5727, 6.4019, 6.2361, 6.0751, 6.2668, 6.4550,\n 6.2993, 6.4846, 6.3333, 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.0980, 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.3467,\n 7.2169, 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 7.8928,\n 8.0413])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Cortisol levels in the saliva of day care children were highest and rose most steeply in those judged by day care center personnel to be the shyest .\nSentence 2: Cortisol levels in the saliva of day-care children were highest and rose most steeply in those whom day-care centre staffed judged to be the shyest .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.6963,\n -0.7377, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.6402, -0.6810,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.5459, -0.5864, -0.4595, -0.5000,\n -0.5403, -0.4145, -0.2894, -0.3299, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "68", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "47.1%", + "z-score": "4.2", + "p value": "1.33e-05", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495, 2.1170,\n 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.5281, 2.8284, 2.6558, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 2.5924, 2.4495, 2.3116, 2.1783, 2.4398, 2.6943,\n 2.9424, 3.1844, 3.4207, 3.2863, 3.5165, 3.3853, 3.6098, 3.4816, 3.7009,\n 3.5753, 3.7897, 4.0000, 3.8765, 3.7559, 3.9614, 3.8431, 4.0446, 3.9284,\n 3.8146, 3.7033, 3.9001, 4.0937, 4.2844, 4.1740, 4.0657, 3.9595, 3.8552,\n 3.7528, 3.9386, 3.8376, 4.0205, 4.2008])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" The idea that Tony Abbott is in some way a one-dimensional political head-kicker couldn 't be more wrong , \" Mr Howard said .\nSentence 2: \" The idea that Tony Abbott is in some way a one-dimensional political head kicker couldn 't be more wrong . \"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.0855, 9.9817, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.0380,\n 9.9392, 9.8414, 9.7447, 9.8634, 9.7678, 9.8858, 9.7912, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.7473, 9.8632, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.2514, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.6265, 10.7349, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 10.9898, 11.0952, 11.0102, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 11.8503, 11.9504, 11.8673,\n 11.9669, 12.0660, 11.9837, 12.0824, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.3908, 12.4872, 12.5831, 12.5024, 12.5979, 12.5179, 12.4384, 12.5336,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.6597, 12.7532, 12.8464, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.1520, 13.2429, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" Biotech products , if anything , may be safer than conventional products because of all the testing , \" Fraley said , adding that 18 countries have adopted biotechnology .\nSentence 2: \" Biotech products , if anything , may be safer than conventional products because of all the testing , \" said Robert Fraley , Monsanto 's executive vice president .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.3478, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.1890, 9.3231, 9.4560, 9.3386, 9.2229, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 9.7897, 9.6813, 9.8064, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.0000, 9.8987, 9.7986, 9.6995, 9.6016,\n 9.5047, 9.4088, 9.3140, 9.2202, 9.3408, 9.2480, 9.1561, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.7610, 8.6783, 8.7952, 8.7133,\n 8.6321, 8.5516, 8.6677, 8.5879, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.9660, 9.0786, 9.1905, 9.3017, 9.2232, 9.1452, 9.0679, 8.9912,\n 8.9151, 8.8396, 8.7647, 8.6903, 8.6165, 8.5433, 8.6537, 8.5810,\n 8.5088, 8.4371, 8.3660, 8.2954, 8.2252, 8.1556, 8.0865, 8.0178,\n 7.9497, 8.0591, 7.9913, 8.1001, 8.2084, 8.3161, 8.2486, 8.1817,\n 8.2887, 8.2221, 8.1560, 8.0902, 8.1966, 8.1312, 8.0663, 8.1721,\n 8.2773, 8.3820, 8.4862, 8.4215, 8.5252, 8.6284, 8.5640, 8.5000,\n 8.4364, 8.3732, 8.3103, 8.2479, 8.1858, 8.1240, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The rate of skin cancer has tripled since the 1950s in Norway and Sweden , according to the study .\nSentence 2: The study also found that skin cancer nearly tripled in Norway and Sweden since the 1950s .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.5238, -0.5695, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.7462, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.7703, -0.6402, -0.6810,\n -0.5518, -0.5927, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: We acted because we saw the existing evidence in a new light , through the prism of our experience on 11 September , \" Rumsfeld said .\nSentence 2: Rather , the US acted because the administration saw \" existing evidence in a new light , through the prism of our experience on September 11 \" .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Trans fat makes up only 1 percent to 3 percent of the total fat Americans consume , compared with 14 percent for saturated fat .\nSentence 2: Trans fat accounts for 2.5 percent of Americans ' daily calories , compared to 11 percent to 12 percent for saturated fat .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "13", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.16", + "p value": "0.564", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: However , a recent slide in prices and OPEC 's expectations of a surge in oil inventories have compounded its fears about a further softening of the market .\nSentence 2: A 14 percent slide in crude prices this month and expectations of a build up in oil inventories compounded OPEC 's fears of a further softening of the market .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.2949, -0.3428, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.0886, -0.1325, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Peterson , 31 , is now charged with murder in the deaths of his 27-year-old wife and their unborn son .\nSentence 2: Peterson , 31 , is charged with two counts of first-degree murder in the slayings of his wife , Laci , and their unborn son , Conner .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.4714,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.6124,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.5706, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "138", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "65.2%", + "z-score": "10.9", + "p value": "5.12e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.1196, 8.0042,\n 8.1471, 8.0335, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 8.9567,\n 8.8518, 8.7482, 8.8780, 8.7757, 8.6747, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.3582, 9.2611, 9.1652,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.8025, 10.9107])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Carlson on Tuesday said he would not recuse himself from the case .\nSentence 2: Service officials said Carlson refused to recuse himself from the case .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.1406, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.6641, -1.7049, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "68.0%", + "z-score": "9.79", + "p value": "6.23e-23", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The program will grow to include ports in Dubai , Turkey and Malaysia , among others .\nSentence 2: The program will be expanded to include areas of the Middle East such as Dubai , Turkey and Malaysia , Mr. Ridge said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.1187, 6.9378,\n 6.7625, 6.5924, 6.4273, 6.2668, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 8.8192, 8.7045, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.4094, 9.3088, 9.2094, 9.1111, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.2202, 9.3408, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 11.0371, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.1810, 11.0952, 11.0102, 10.9259, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.4009, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.5414, 11.6412, 11.7405, 11.6606, 11.5813,\n 11.5026, 11.4244, 11.3468, 11.2698, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.4356, 11.3608, 11.4581, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.7611, 11.8551, 11.9487, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: McCabe said he was considered a witness , not a suspect .\nSentence 2: \" He is not considered a suspect , \" McCabe said .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "7.5%", + "z-score": "-5.69", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.5820, -2.6458, -2.7080, -2.7689, -2.8284,\n -2.8868, -2.9439, -3.0000, -3.0551, -3.1091, -3.1623, -3.2146, -3.2660,\n -3.3166, -3.3665, -3.4157, -3.4641, -3.5119, -3.5590, -3.6056, -3.6515,\n -3.6968, -3.7417, -3.7859, -3.8297, -3.8730, -3.9158, -3.9581, -4.0000,\n -4.0415, -4.0825, -3.7997, -3.8431, -3.8860, -3.9284, -3.9703, -4.0119,\n -4.0530, -4.0937, -4.1341, -4.1740, -4.2136, -4.2528, -4.2916, -4.3301,\n -4.3683, -4.4061, -4.4437, -4.4809, -4.5178, -4.5544, -4.5908, -4.6268,\n -4.6626, -4.6981, -4.7333, -4.7683, -4.8030, -4.8375, -4.8718, -4.9058,\n -4.9396, -4.9731, -5.0064, -5.0395, -5.0724, -5.1051, -5.1376, -5.1698,\n -5.2019, -5.2338, -5.2655, -5.2970, -5.3283, -5.3594, -5.3904, -5.4212,\n -5.4518, -5.4822, -5.5125, -5.5426, -5.5725, -5.6023, -5.6319, -5.6614,\n -5.6907, -5.7199, -5.7489, -5.7778, -5.8065, -5.8351, -5.8636, -5.8919,\n -5.9201, -5.9481, -5.9760, -6.0038, -6.0315, -6.0590, -6.0864, -6.1137,\n -6.1409, -6.1680, -6.1949, -6.2217, -6.2484, -6.2750, -6.3015, -6.3278,\n -6.3541, -6.3803, -6.4063, -6.4322, -6.4581, -6.4838, -6.5094, -6.3369,\n -6.3631, -6.3892, -6.4151, -6.2458, -6.2722, -6.2985, -6.1316, -6.1584,\n -6.1851, -6.2116, -6.0476, -6.0746, -6.1015, -6.1283, -5.9670, -5.9941,\n -6.0212, -5.8621, -5.7040, -5.7319, -5.7597, -5.6036, -5.6318, -5.6598,\n -5.6877, -5.5340, -5.5623, -5.5904, -5.4385, -5.4670, -5.4952, -5.5234,\n -5.3738, -5.4023, -5.4306, -5.4588, -5.3113, -5.3398, -5.3682, -5.2223,\n -5.2510, -5.1064, -5.1352, -5.1640, -5.1926, -5.2211, -5.2495, -5.2778,\n -5.3060, -5.3340, -5.3619, -5.3898, -5.4175, -5.4451, -5.4726, -5.5000,\n -5.5273, -5.5545, -5.5816, -5.6085, -5.6354, -5.6622, -5.6889])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.0139, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 8.9815,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.3459, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.6066,\n 10.7277, 10.8477, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.5492,\n 11.6631, 11.7762, 11.6652, 11.5556, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 11.7881, 11.8982, 12.0077, 12.1164, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.4405, 12.3377, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.9662, 13.0674, 13.1680, 13.2680,\n 13.1701, 13.0732, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.2791,\n 13.3770, 13.4745, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 13.8615,\n 13.7694, 13.6781, 13.7730, 13.8675, 13.9615, 14.0550, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.5199, 14.4321,\n 14.3449, 14.4355, 14.5257, 14.6155, 14.7049, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.1498, 15.0657, 14.9821,\n 15.0689, 15.1553, 15.2414, 15.3272, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.7545, 15.6736, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The woman was exposed to the SARS virus while in the hospital but was not a health care worker , said Dr. Colin D \u2019 Cunha , Ontario \u2019 s commissioner of public health .\nSentence 2: The woman was exposed to the SARS virus while in the hospital but was not a health-care worker , said Dr Colin D 'Cunha , Ontario 's commissioner of public health .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "140", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.586", + "p value": "0.279", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.4899, -0.5659, -0.3203, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.5974, 0.7570, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.5855])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 8.9355, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.1199, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 11.0070, 10.9123, 11.0227,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.1640, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 11.8427, 11.9455, 11.8571, 11.9594, 12.0611, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.5129, 12.6103, 12.5264, 12.6234, 12.7199, 12.8160, 12.9116, 12.8285,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.6313, 13.7215, 13.8113, 13.7327, 13.8222, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.0214, 14.1091, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He added that those \" are not solely American principles , nor are they exclusively Western . \"\nSentence 2: \" These are not solely American principles nor are they exclusively Western , \" Rumsfeld said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "1.5%", + "z-score": "-7.65", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.5820, -2.6458, -2.7080, -2.7689, -2.8284,\n -2.8868, -2.9439, -3.0000, -3.0551, -3.1091, -3.1623, -3.2146, -3.2660,\n -3.3166, -3.3665, -3.4157, -3.4641, -3.5119, -3.5590, -3.6056, -3.6515,\n -3.6968, -3.7417, -3.7859, -3.8297, -3.8730, -3.9158, -3.9581, -4.0000,\n -4.0415, -4.0825, -3.7997, -3.8431, -3.8860, -3.9284, -3.9703, -4.0119,\n -4.0530, -4.0937, -4.1341, -4.1740, -4.2136, -4.2528, -4.2916, -4.3301,\n -4.3683, -4.4061, -4.4437, -4.4809, -4.5178, -4.5544, -4.5908, -4.6268,\n -4.6626, -4.6981, -4.7333, -4.7683, -4.8030, -4.8375, -4.8718, -4.9058,\n -4.9396, -4.9731, -5.0064, -5.0395, -5.0724, -5.1051, -5.1376, -5.1698,\n -5.2019, -5.2338, -5.2655, -5.2970, -5.3283, -5.3594, -5.3904, -5.4212,\n -5.4518, -5.4822, -5.5125, -5.5426, -5.5725, -5.6023, -5.6319, -5.6614,\n -5.6907, -5.7199, -5.7489, -5.7778, -5.8065, -5.8351, -5.6444, -5.6737,\n -5.7028, -5.7318, -5.7607, -5.7894, -5.8180, -5.8464, -5.8747, -5.9029,\n -5.9310, -5.9589, -5.9867, -6.0143, -6.0419, -6.0693, -6.0966, -6.1237,\n -6.1508, -6.1777, -6.2045, -6.2312, -6.2578, -6.2843, -6.3107, -6.3369,\n -6.3631, -6.3892, -6.4151, -6.4409, -6.4667, -6.4923, -6.5179, -6.5433,\n -6.5686, -6.5939, -6.6190, -6.6441, -6.6691, -6.6939, -6.7187, -6.7434,\n -6.7680, -6.7925, -6.8170, -6.8413, -6.8656, -6.8897, -6.9138, -6.9378,\n -6.9617, -6.9856, -7.0093, -7.0330, -7.0566, -7.0801, -7.1036, -7.1270,\n -6.9726, -6.9964, -7.0200, -7.0436, -7.0671, -7.0905, -7.1139, -7.1372,\n -7.1604, -7.1835, -7.2066, -7.2296, -7.2525, -7.2753, -7.2981, -7.3208,\n -7.3434, -7.3660, -7.3885, -7.4109, -7.4333, -7.4556, -7.4778, -7.5000,\n -7.5221, -7.5441, -7.5661, -7.5880, -7.6099, -7.6317, -7.6534])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" If we don 't march into Tehran , I think we will be in pretty good shape , \" he said .\nSentence 2: \" As long as we don 't march on Tehran , I think we are going to be in pretty good shape , \" he said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.0812, -1.1263, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.2096, -1.2521, -1.1094, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -0.9816, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: On Saturday , a 149mph serve against Agassi equalled Rusedski 's world record .\nSentence 2: On Saturday , Roddick equalled the world record with a 149 m.p.h. serve in beating Andre Agassi .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.2186, -2.2646,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.0739, -2.1172, -1.9545, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -1.8953, -1.9379, -1.9803,\n -2.0224, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -1.9688, -2.0083,\n -2.0476, -2.0866, -1.9445, -1.9837, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.6444, -1.6830, -1.7213, -1.7595, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 2.8947, 3.1623, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.2205, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.6571, 4.5461, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.4312, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.1978, 5.0990, 5.0017, 5.1640,\n 5.3245, 5.2281, 5.1332, 5.2915, 5.4482, 5.3541, 5.2614, 5.4160,\n 5.5691, 5.4772, 5.6286, 5.5377, 5.4480, 5.5976, 5.7458, 5.8926,\n 6.0380, 5.9488, 6.0927, 6.0044, 5.9172, 6.0596, 6.2008, 6.3408,\n 6.4795, 6.6171, 6.5303, 6.4444, 6.5807, 6.4957, 6.6308, 6.5465,\n 6.6804, 6.5970, 6.5144, 6.4327, 6.3517, 6.2716, 6.4040, 6.5354,\n 6.4558, 6.5861, 6.7155, 6.8439, 6.7648, 6.6865, 6.8138, 6.9402,\n 6.8624, 6.9879, 7.1125, 7.2363, 7.1590, 7.2818, 7.4039, 7.5251,\n 7.4483, 7.3721, 7.4924, 7.6120, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.0880, 8.0139, 8.1282, 8.2420,\n 8.1683, 8.2813, 8.3937, 8.5054, 8.4322, 8.3595, 8.4706, 8.5810,\n 8.5088, 8.6186, 8.7278, 8.8364, 8.7646, 8.6933, 8.8013, 8.9087,\n 8.8379, 8.9447, 9.0510, 9.1567, 9.0863, 9.1915, 9.2961, 9.4002,\n 9.3302, 9.4338, 9.5369, 9.6394, 9.5698, 9.6719, 9.7735, 9.8746,\n 9.8054, 9.9060, 10.0061, 10.1058, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.2650, 10.3628, 10.4603, 10.5573, 10.4893, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Chad Kolton , emergency management spokesman with the Department of Homeland Security , said the government is open to new technologies and methods to communicate more quickly and efficiently .\nSentence 2: Chad Kolton , emergency management spokesman with the Department of Homeland Security , said the government is open to new ways to communicate .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: \" APEC leaders are painfully aware that security and prosperity are inseparable , \" Thai Prime Minister Thaksin Shinawatra told business leaders .\nSentence 2: \" APEC leaders are painfully aware that security and prosperity are inseparable , \" Thaksin said .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.1170, -2.1678, -1.9645, -2.0158, -2.0665, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.2162, -2.2630, -2.3094, -2.1256, -2.1723, -2.2186, -2.0381,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -2.0548, -2.0997, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.3354, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.3094,\n -2.3494, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -2.8887, -2.7495, -2.7852,\n -2.8208, -2.6830, -2.7187, -2.5820, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.8977, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.7358, -2.7701, -2.8043, -2.8383, -2.8721, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "68.0%", + "z-score": "9.79", + "p value": "6.23e-23", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Danbury prosecutor Warren Murray could not be reached for comment Monday .\nSentence 2: Prosecutors could not be reached for comment after the legal papers were obtained late Monday afternoon .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Wittig resigned last year after being indicted on federal bank fraud charges involving a real estate loan unrelated to Westar business .\nSentence 2: Wittig resigned in late November about two weeks after being indicted on bank fraud charges in a real estate case unrelated to the company .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165, 0.5774,\n 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774, 0.4201, 0.2722,\n 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714, 0.3464, 0.6794, 1.0000,\n 1.3093, 1.1793, 1.4757, 1.3480, 1.2247, 1.1055, 1.3862, 1.6590, 1.5396,\n 1.4237, 1.3112, 1.5717, 1.4606, 1.3525, 1.2472, 1.1446, 1.3926, 1.2910,\n 1.1918, 1.0948, 1.3333, 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999,\n 1.3234, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547,\n 0.8660, 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582, 0.1925,\n 0.1275, 0.0634, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462, 0.1836, 0.3651,\n 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071, 0.6448, 0.5832, 0.7543,\n 0.9238, 1.0915, 1.0290, 1.1946, 1.1323, 1.0705, 1.0094, 1.1721, 1.3333,\n 1.2719, 1.2111, 1.1508, 1.3093, 1.2492, 1.1896, 1.1306, 1.0721, 1.2276,\n 1.1693, 1.1114, 1.0541, 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316,\n 1.0759, 1.2247, 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447,\n 0.7921, 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620, 0.7001,\n 0.8374, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303, 0.6825, 0.6351,\n 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.8909, 0.8438, 0.7971, 0.7506,\n 0.7044, 0.6584, 0.7878, 0.7419, 0.6963, 0.6509, 0.6058, 0.5610, 0.5164,\n 0.4721, 0.4280, 0.5548, 0.5108, 0.4669, 0.4233, 0.3800, 0.3369, 0.2940,\n 0.2513, 0.2089, 0.1667, 0.2909, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462,\n 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "64.1%", + "z-score": "12.6", + "p value": "9.28e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.9169, 9.0536, 8.9355, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.6470, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.3641, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.4563, 9.5784, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.7444, 10.8544, 10.7635, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.3546,\n 11.2674, 11.1810, 11.2857, 11.2001, 11.3043, 11.2194, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.4209, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.6220, 11.7200, 11.8176, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.9534, 11.8771, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.3888, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.5183, 12.6102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Shattered Glass , \" starring Hayden Christensen as Stephen Glass , debuted well with $ 80,000 in eight theaters .\nSentence 2: \" Shattered Glass \" _ starring Hayden Christensen as Stephen Glass , The New Republic journalist fired for fabricating stories _ debuted well with $ 80,000 in eight theaters .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714, 0.3464, 0.6794, 0.5556,\n 0.8729, 1.1793, 1.4757, 1.7628, 2.0412, 1.9096, 2.1783, 2.0494, 1.9245,\n 1.8034, 2.0605, 1.9415, 1.8257, 1.7132, 1.9599, 1.8489, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.6667, 1.8970, 2.1229, 2.0211, 1.9215, 1.8240, 2.0428,\n 1.9462, 1.8516, 2.0647, 1.9711, 1.8791, 1.7889, 1.7002, 1.9064, 1.8185,\n 1.7321, 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.5181, 1.4389, 1.6330,\n 1.8245, 2.0135, 2.2000, 2.3842, 2.3028, 2.4841, 2.4034, 2.3238, 2.2453,\n 2.4228, 2.3448, 2.2678, 2.1918, 2.3658, 2.2902, 2.2156, 2.1420, 2.0692,\n 1.9973, 2.1669, 2.3349, 2.5011, 2.4286, 2.5927, 2.7552, 2.6828, 2.8433,\n 3.0022, 2.9299, 3.0870, 3.0151, 3.1704, 3.3243, 3.2525, 3.1814, 3.1111,\n 3.0415, 3.1928, 3.1236, 3.2733, 3.2044, 3.1363, 3.0688, 3.2163, 3.3627,\n 3.5079, 3.4402, 3.3731, 3.3066, 3.2408, 3.3838, 3.3182, 3.4599, 3.3947,\n 3.3301, 3.2660, 3.4058, 3.3420, 3.4806, 3.4171, 3.3542, 3.2918, 3.4286,\n 3.5645, 3.6995, 3.6369, 3.7707, 3.7084, 3.6466, 3.7791, 3.7176, 3.8490,\n 3.9795, 4.1092, 4.0476, 3.9865, 4.1150, 4.2426, 4.1816, 4.1210, 4.0608,\n 4.0011, 4.1273, 4.0678, 4.1931, 4.1338, 4.0750, 4.0166, 4.1406, 4.0825,\n 4.2056, 4.1477, 4.2699, 4.3915, 4.3336, 4.4544, 4.5744, 4.5166, 4.6359,\n 4.5783, 4.6968, 4.6395, 4.5826, 4.5260, 4.4698, 4.5871, 4.7037, 4.8197,\n 4.7635, 4.8787, 4.8227, 4.7670, 4.8815, 4.9953, 4.9397, 4.8845, 4.8295,\n 4.7749, 4.8877, 4.8333, 4.9455, 4.8913, 4.8374, 4.7838, 4.8950, 4.8416,\n 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: He was arrested Friday night at an Alpharetta seafood restaurant while dining with his wife , singer Whitney Houston .\nSentence 2: He was arrested again Friday night at an Alpharetta restaurant where he was having dinner with his wife .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "13.1%", + "z-score": "-3.89", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.0250, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.7080,\n -2.5092, -2.5560, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.8368,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.1500, -3.1889, -3.2276, -3.2660,\n -3.3041, -3.3420, -3.3797, -3.4171, -3.4543, -3.4913, -3.5280, -3.3665,\n -3.4035, -3.2437, -3.2810, -3.3181, -3.3549, -3.1977, -3.2348, -3.2717,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.2636, -3.2998, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.3282, -3.3637, -3.3989, -3.2509, -3.2863,\n -3.1396, -3.1753, -3.2107, -3.2460, -3.2811, -3.3160, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.5555, -3.5890, -3.6224, -3.6556,\n -3.6887, -3.7216, -3.7543, -3.7869, -3.8194, -3.8516, -3.8838, -3.9158,\n -3.7778, -3.8100, -3.8420, -3.8739, -3.9056, -3.9372, -3.9687, -4.0000,\n -3.8649, -3.8964, -3.9278, -3.9590, -3.9900, -4.0210, -3.8881])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.5191, 9.4088, 9.5368, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.5623, 10.6793, 10.7955, 10.9109,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.0488, 11.1614, 11.0615, 10.9626,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.6584, 11.7647, 11.6709, 11.7766, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.3128, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.8120, 13.9042, 13.8193, 13.9111, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.0170, 13.9343, 14.0248, 14.1149, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.2200, 14.1393, 14.2282, 14.3166, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.7601, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: However , EPA officials would not confirm the 20 percent figure .\nSentence 2: Only in the past few weeks have officials settled on the 20 percent figure .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "2.5%", + "z-score": "-7.33", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.5820, -2.6458, -2.7080, -2.7689, -2.8284,\n -2.8868, -2.9439, -3.0000, -3.0551, -3.1091, -3.1623, -3.2146, -3.2660,\n -3.3166, -3.3665, -3.0253, -3.0792, -3.1322, -3.1844, -3.2358, -3.2863,\n -3.3362, -3.3853, -3.4338, -3.4816, -3.5287, -3.5753, -3.6213, -3.6667,\n -3.7115, -3.7559, -3.4763, -3.5228, -3.5687, -3.6141, -3.6589, -3.7033,\n -3.7471, -3.7905, -3.8334, -3.8759, -3.9179, -3.9595, -4.0007, -4.0415,\n -4.0819, -4.1219, -4.1615, -4.2008, -4.2398, -4.2784, -4.3167, -4.3546,\n -4.3923, -4.4296, -4.4667, -4.5034, -4.5399, -4.5760, -4.6119, -4.6476,\n -4.6830, -4.7181, -4.7529, -4.7875, -4.8219, -4.8561, -4.8900, -4.9237,\n -4.9571, -4.9904, -5.0234, -5.0562, -5.0888, -5.1212, -5.1534, -5.1855,\n -5.2173, -5.2489, -5.2804, -5.3116, -5.3427, -5.3736, -5.4044, -5.4349,\n -5.4653, -5.4956, -5.5256, -5.5556, -5.5853, -5.6149, -5.4252, -5.4554,\n -5.4856, -5.5155, -5.5453, -5.5750, -5.6045, -5.6338, -5.6630, -5.6921,\n -5.7210, -5.7498, -5.7784, -5.8069, -5.8353, -5.8635, -5.8916, -5.9196,\n -5.9474, -5.9752, -6.0028, -6.0302, -6.0576, -6.0848, -6.1119, -6.1389,\n -6.1658, -6.1926, -6.2192, -6.2458, -6.2722, -6.2985, -6.3247, -6.3509,\n -6.3769, -6.4028, -6.4286, -6.4543, -6.4799, -6.5054, -6.5308, -6.5561,\n -6.5813, -6.6064, -6.6315, -6.6564, -6.6812, -6.7060, -6.7307, -6.7552,\n -6.7797, -6.8041, -6.8285, -6.8527, -6.8768, -6.9009, -6.9249, -6.9488,\n -6.7950, -6.8192, -6.8434, -6.8675, -6.8915, -6.9155, -6.9393, -6.9631,\n -6.9868, -7.0104, -7.0340, -7.0574, -7.0808, -7.1041, -7.1274, -7.1506,\n -7.1737, -7.1967, -7.0507, -7.0741, -7.0973, -7.1205, -7.1436, -7.1667,\n -7.1896, -7.2125, -7.2354, -7.2581, -7.2808, -7.3034, -7.3260])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 6.8229, 6.6896, 6.5591, 6.4312, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.3411, 10.2390, 10.1379, 10.0380,\n 9.9392, 9.8414, 9.9601, 9.8634, 9.9813, 9.8858, 10.0029, 9.9085,\n 10.0249, 9.9315, 10.0472, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.2758, 12.1893, 12.1036, 12.0185, 11.9341, 12.0341, 11.9504, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.4722,\n 12.3908, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.8019, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.5897, 13.5131, 13.4371, 13.3615, 13.4510, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: University of Michigan President Mary Sue Coleman said in a statement on the university 's Web site , \" Our fundamental values haven 't changed .\nSentence 2: \" Our fundamental values haven 't changed , \" Mary Sue Coleman , president of the university , said in a statement in Ann Arbor .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "22.9%", + "z-score": "-0.444", + "p value": "0.671", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.1275, -0.8944, -0.6653, -0.7332, -0.8001, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.4017, 8.5491, 8.6948, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 8.7599, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.3091, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.9754, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.3985, 12.5001, 12.4081, 12.5093,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.7279, 12.8267, 12.7376,\n 12.6492, 12.7476, 12.8456, 12.7581, 12.8556, 12.9527, 12.8661, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.3447, 13.2593, 13.3537, 13.2690, 13.3631,\n 13.2791, 13.1957, 13.2895, 13.3829, 13.4758, 13.5683, 13.6604, 13.5781,\n 13.6698, 13.7612, 13.8522, 13.9427, 13.8613, 13.9515, 14.0414, 13.9606,\n 14.0502, 13.9700, 14.0593, 14.1482, 14.2367, 14.3248, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The first products are likely to be dongles costing between US $ 100 and US $ 150 that will establish connections between consumer electronics devices and PCs .\nSentence 2: The first products will likely be dongles costing $ 100 to $ 150 that will establish connections between consumer electronics devices and PCs .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.2010, -0.2503, -0.2993, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.0444, -0.0886, -0.1325, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425,\n 6.6172, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: However , Lapidus expects foreign brands ' sales to be up 4 percent , driven by strong truck sales at Honda Motor Co .\nSentence 2: Lapidus expects Ford to be down 5 percent , Chrysler down 10 percent and foreign brands up 4 percent driven by strong truck sales at Honda .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "101", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "16.8%", + "z-score": "-1.9", + "p value": "0.971", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -1.9044, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.0785, -1.8958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868, 2.6605, 2.9938,\n 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998, 3.1177, 3.3968, 3.2222,\n 3.0551, 3.3235, 3.1623, 3.0072, 3.2660, 3.5176, 3.3665, 3.2205, 3.4641,\n 3.7017, 3.9337, 4.1603, 4.0166, 3.8772, 4.0980, 3.9620, 4.1779, 4.0451,\n 4.2563, 4.1265, 4.3333, 4.2064, 4.4091, 4.2848, 4.4836, 4.3618, 4.2426,\n 4.4374, 4.3205, 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 4.7281,\n 4.9075, 4.7980, 4.9747, 4.8669, 5.0410, 5.2129, 5.1065, 5.0019, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.2981, 5.1978, 5.3605, 5.2615, 5.4222, 5.3245,\n 5.4832, 5.3867, 5.5435, 5.4482, 5.6032, 5.5090, 5.6622, 5.5691, 5.4772,\n 5.6286, 5.5377, 5.4480, 5.5976, 5.7458, 5.6569, 5.5690, 5.7155, 5.8606,\n 6.0044, 6.1470, 6.0596, 5.9732, 6.1143, 6.0288, 6.1685, 6.0838, 6.2222,\n 6.1383, 6.2755, 6.1924, 6.3283, 6.2459, 6.3807, 6.2991, 6.2183, 6.3517,\n 6.2716, 6.1923, 6.3246, 6.4558, 6.5861, 6.7155, 6.6365, 6.7648, 6.8922,\n 6.8138, 6.9402, 6.8624, 6.9879, 6.9107, 6.8343, 6.7585, 6.6833, 6.8076,\n 6.9310, 7.0537, 6.9789, 6.9048, 7.0265, 6.9529, 7.0737, 7.0007, 7.1207,\n 7.0481, 7.1673, 7.0952, 7.2136, 7.1421, 7.2596, 7.1886, 7.1181, 7.2348,\n 7.1647, 7.0952, 7.2111, 7.3263, 7.2572, 7.1885, 7.3030, 7.4168, 7.5299,\n 7.6424, 7.7544, 7.8657, 7.7971, 7.9078, 7.8397, 7.7720, 7.8820, 7.9913,\n 7.9241, 7.8572, 7.9659, 7.8995, 8.0076, 7.9415, 7.8759, 7.8107, 7.7460,\n 7.6816, 7.6177, 7.7249, 7.8316, 7.9377, 7.8740, 7.8107, 7.9162, 7.8533,\n 7.9582, 7.8956, 8.0000, 7.9377, 8.0416, 7.9796, 8.0829, 8.0212, 8.1240,\n 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Michel , who remains in the government , denied that US pressure had provoked the government 's move .\nSentence 2: Michel , who has stayed in the new government , denied that it was U.S. pressure which had provoked the government 's move .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "25.0%", + "z-score": "0", + "p value": "0.5", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.6255, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.5636, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.4888, -0.3244, -0.3769, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.2657, 0.2208, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.2158, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "133", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "69.2%", + "z-score": "11.8", + "p value": "2.97e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 7.9286, 7.7942,\n 7.9489, 8.1016, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.7419, 8.8823, 8.7599, 8.8991, 8.7788,\n 8.9169, 8.7986, 8.6820, 8.5672, 8.7045, 8.5915, 8.7277, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 8.9324, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.5157, 11.6242, 11.5271, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.7647])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Some of the computers also are used to send spam e-mail messages to drum up traffic to the sites .\nSentence 2: Some are also used to send spam e-mail messages to boost traffic to the sites .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "29.0%", + "z-score": "0.949", + "p value": "0.171", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.8337, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 0.9238, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.9488])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 6.0212, 5.7155, 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 5.7735, 5.9797, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 8.9763, 9.0987, 9.0057, 8.9138, 9.0354, 9.1561, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.3074,\n 9.2240, 9.1414, 9.2554, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 9.8197, 9.7405,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.6665, 10.7671, 10.8673, 10.9669,\n 10.8925, 10.9917, 10.9178, 10.8444, 10.7714, 10.8702, 10.7978, 10.7258,\n 10.6544, 10.7527, 10.8505, 10.9480, 11.0450, 10.9740, 10.9034, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Indonesia 's army has often been accused of human rights abuses during GAM 's battle for independence , charges it has generally denied while accusing the separatists of committing rights violations .\nSentence 2: Indonesia 's army has been accused of human rights abuses during its earlier battles with GAM , charges it has generally denied .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.0793, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.4264, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.9396, 1.1333, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.8793, 1.0498, 0.9864, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.7399, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.5203, 0.4714, 0.6108, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.6351, 0.7688, 0.9017, 1.0338, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 1.1038, 1.2326, 1.3607, 1.4881, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.0106, 0.9659, 1.0890, 1.0444, 1.0000,\n 0.9558, 1.0777, 1.1990, 1.3197, 1.2752, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.4146, 7.3073, 7.2016, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 8.0829, 7.9853, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.7327, 8.6436,\n 8.5553, 8.6770, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.1856,\n 9.0991, 9.0134, 8.9285, 9.0453, 9.1615, 9.2768, 9.1927, 9.1094,\n 9.2240, 9.3380, 9.2554, 9.3686, 9.4812, 9.3993, 9.3181, 9.2376,\n 9.1577, 9.0786, 9.0000, 8.9221, 8.8448, 8.7681, 8.8800, 8.8039,\n 8.7284, 8.6535, 8.7647, 8.6903, 8.6165, 8.5433, 8.6537, 8.7636,\n 8.8728, 8.8000, 8.9086, 8.8364, 8.7646, 8.8726, 8.9800, 9.0869,\n 9.1932, 9.1218, 9.2276, 9.3328, 9.4375, 9.3665, 9.2961, 9.2261,\n 9.3302, 9.4338, 9.5369, 9.6394, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.6356, 9.5673, 9.6684, 9.7690, 9.8691, 9.8012, 9.9008, 10.0000,\n 10.0987, 10.0312, 10.1295, 10.2273, 10.1602, 10.0935, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Bush also hoped to polish his anti-AIDS credentials in Uganda , which has been hailed as an African pioneer in fighting the killer disease .\nSentence 2: President Bush flies to Uganda Friday hoping to polish his anti- AIDS credentials in a country hailed as an African pioneer in fighting the epidemic .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 2.2517, 2.0656, 2.3938, 2.2156, 2.5281, 2.3570, 2.1939, 2.0381, 2.3333,\n 2.1822, 2.0370, 1.8974, 2.1776, 2.0412, 1.9096, 1.7823, 1.6590, 1.5396,\n 1.4237, 1.3112, 1.2019, 1.0954, 0.9918, 1.2472, 1.1446, 1.3926, 1.6353,\n 1.8728, 2.1054, 2.0000, 1.8970, 1.7963, 2.0211, 1.9215, 1.8240, 2.0428,\n 1.9462, 1.8516, 2.0647, 1.9711, 2.1798, 2.3851, 2.2916, 2.1997, 2.1094,\n 2.3094, 2.2200, 2.1320, 2.0455, 1.9604, 1.8766, 1.7942, 1.7130, 1.9052,\n 2.0948, 2.0135, 1.9333, 2.1193, 2.0397, 2.2226, 2.1436, 2.3238, 2.5019,\n 2.4228, 2.3448, 2.2678, 2.4423, 2.3658, 2.5378, 2.4618, 2.6316, 2.5560,\n 2.4814, 2.4077, 2.5743, 2.5011, 2.6656, 2.5927, 2.7552, 2.6828, 2.6112,\n 2.5403, 2.7001, 2.6296, 2.5600, 2.4910, 2.6481, 2.5796, 2.5117, 2.4444,\n 2.3779, 2.3120, 2.2468, 2.1822, 2.1182, 2.0548, 1.9920, 2.1442, 2.0817,\n 2.2323, 2.3817, 2.5298, 2.6768, 2.6135, 2.5508, 2.4887, 2.6336, 2.5717,\n 2.5103, 2.6536, 2.5925, 2.5318, 2.6735, 2.6131, 2.7534, 2.8928, 2.8324,\n 2.7724, 2.7129, 2.8505, 2.7913, 2.7325, 2.6742, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.5802, 2.7143, 2.6576, 2.6014, 2.7341, 2.6781, 2.8098, 2.7539,\n 2.8845, 2.8288, 2.7735, 2.7186, 2.6640, 2.7930, 2.7386, 2.8666, 2.8124,\n 2.9394, 2.8853, 2.8316, 2.7783, 2.9040, 2.8508, 2.9756, 2.9225, 3.0464,\n 2.9935, 2.9410, 2.8887, 3.0114, 2.9593, 2.9076, 2.8561, 2.9776, 2.9263,\n 2.8752, 2.8245, 2.7741, 2.7240, 2.6742, 2.6247, 2.5754, 2.5265, 2.4778,\n 2.5969, 2.5483, 2.6667, 2.7844, 2.9016, 3.0182, 2.9692, 2.9205, 2.8721,\n 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Later this year , the command will send trainers with soldiers from four North African nations on patrolling and intelligence gathering missions .\nSentence 2: This fall the command will send trainers to work with soldiers from four North African nations on patrolling and gathering intelligence .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "28.7%", + "z-score": "1.1", + "p value": "0.137", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.0137, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.9401, -0.7707, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.5726, -0.4148, -0.2582, -0.3086, -0.1537, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.3453, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.7095, 0.8485, 0.9867, 1.1239,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.0598, 1.0105, 1.1447, 1.0954])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: The reports helped overcome investor jitters after the euro briefly hit an all-time high against the dollar Tuesday .\nSentence 2: Stocks slipped at the open after the euro hit record highs against the dollar .\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.7868, 0.9631, 0.8980, 0.8337, 1.0070, 0.9428,\n 0.8793, 1.0498, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 1.0284, 1.1711, 1.1183, 1.0659, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 1.1305, 1.2623, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.3607, 1.4881, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.2377, 1.1918,\n 1.3159, 1.4393, 1.3933, 1.5159, 1.6378, 1.5916, 1.5457, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.7065, 9.5876, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.6470, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.5623, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.9229, 10.8215, 10.9355, 10.8353, 10.7362, 10.8498, 10.7517,\n 10.6547, 10.7678, 10.6719, 10.5769, 10.6894, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.6059, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.0476, 11.9594, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.2758, 12.1893, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.2627, 12.1805, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 12.9755, 13.0688, 12.9891, 13.0821, 13.1746, 13.2668, 13.3585, 13.2796,\n 13.3710, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.5265, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Sales for the quarter beat expectations , rising 37 percent year-on-year to 1.76 billion euros .\nSentence 2: Sales rose 37 per cent year-on-year to 1.76bn , beating expectations .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.2010, -0.2503, -0.0998, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.2487, -0.2894, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.1187, 7.3030,\n 7.4839, 7.3051, 7.4838, 7.6594, 7.4878, 7.6613, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.6140, 7.4790, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 7.9196, 8.0667, 7.9472, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.0419, 8.9469,\n 8.8529, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.6758, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.8430, 9.7590, 9.8702, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.0611, 10.1692, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.7423, 10.8443, 10.9458, 10.8686,\n 10.9697, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.6217, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.7611, 11.8551, 11.9487, 12.0419, 12.1347, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: If the MTA 's appeal to a higher court is successful , the $ 2 bus and subway base fare won 't be rolled back .\nSentence 2: If the MTA 's appeal is successful , the $ 2 bus and subway base fare won 't change .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.6340, -1.6823, -1.5070, -1.5556, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.1381,\n -1.9985, -2.0369, -2.0751, -1.9370, -1.9753, -1.8383, -1.8767, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -1.8728,\n -1.9101, -1.9473, -1.9843, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -1.8701, -1.9068, -1.9432, -1.9795, -1.8511, -1.8874, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two sentences paraphrases of each other? Answer 'yes' or 'no':\nSentence 1: Robert Walsh , 40 , remained in critical but stable condition Friday at Staten Island University Hospital 's north campus .\nSentence 2: Walsh , also 40 , was in critical but stable condition at Staten Island University Hospital last night .\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -1.0445,\n -1.0849, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "93", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "57.0%", + "z-score": "7.12", + "p value": "5.23e-13", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140, 4.5033, 4.7556, 5.0000,\n 5.2372, 5.4678, 5.6921, 5.4958, 5.3072, 5.1257, 5.3468, 5.5626, 5.7735,\n 5.9797, 5.8068, 5.6395, 5.4772, 5.6805, 5.8797, 5.7229, 5.5705, 5.7664,\n 5.6183, 5.8108, 5.6667, 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140,\n 5.9944, 6.1721, 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648,\n 6.9282, 7.0895, 6.9646, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.7132, 6.8641,\n 6.7583, 6.6541, 6.5514, 6.4501, 6.5993, 6.4993, 6.6469, 6.7931, 6.9378,\n 7.0812, 7.2232, 7.1243])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + } + ], + "metrics": { + "accuracy_without_watermark": 0.59, + "accuracy_with_watermark": 0.57, + "f1_without_watermark": 0.7210884353741497, + "f1_with_watermark": 0.6993006993006993 + } + } + }, + "qqp": { + "train": { + "results": [ + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How is the life of a math student? Could you describe your own experiences?\nQuestion 2: Which level of prepration is enough for the exam jlpt5?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -1.8385, -1.8843, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.7217, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -1.8571, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.7636, -1.8033, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.6241, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.5627, 2.4351, 2.6811, 2.9212,\n 2.7952, 2.6726, 2.9055, 3.1334, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.4293, 3.3147, 3.2026, 3.0929, 2.9856, 2.8804, 3.0861,\n 3.2883, 3.1840, 3.3824, 3.2796, 3.1787, 3.3729, 3.2733, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.0806, 2.9887, 3.1743, 3.0833, 3.2660,\n 3.4463, 3.6242, 3.8000, 3.9736, 4.1451, 4.3146, 4.2222, 4.3894,\n 4.5547, 4.7181, 4.8797, 5.0395, 5.1977, 5.1051, 5.0138, 4.9237,\n 4.8347, 4.9904, 5.1444, 5.0562, 4.9691, 4.8830, 4.7980, 4.9497,\n 5.1000, 5.0156, 5.1643, 5.3116, 5.4576, 5.6023, 5.5181, 5.6614,\n 5.8034, 5.9442, 6.0838, 6.0000, 6.1383, 6.2755, 6.1924, 6.1101,\n 6.0287, 6.1644, 6.2991, 6.4327, 6.5653, 6.6968, 6.6157, 6.5354,\n 6.4558, 6.5861, 6.5072, 6.4291, 6.5583, 6.6865, 6.6089, 6.5320,\n 6.6591, 6.5828, 6.7090, 6.6332, 6.7585, 6.8828, 7.0063, 6.9310,\n 7.0537, 6.9789, 7.1007, 7.2217, 7.3419, 7.2675, 7.3869, 7.5056,\n 7.6235, 7.7407, 7.8571, 7.9729, 7.8988, 8.0139, 8.1282, 8.2420,\n 8.1683, 8.2813, 8.3937, 8.5054, 8.6165, 8.7270, 8.8369, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.2311, 9.1587, 9.2651,\n 9.3708, 9.2990, 9.4042, 9.3328, 9.4375, 9.3665, 9.2961, 9.4002,\n 9.3302, 9.2607, 9.3642, 9.4673, 9.3982, 9.3295, 9.2613, 9.3638,\n 9.2960, 9.3980, 9.4995, 9.6005, 9.7011, 9.8012, 9.9008, 9.8333,\n 9.9325, 10.0312, 10.1295, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I control my horny emotions?\nQuestion 2: How do you control your horniness?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "22.9%", + "z-score": "-0.444", + "p value": "0.671", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 6.9307, 6.7489, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.8766, 7.7326, 7.8923, 8.0498, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.7150, 7.8667, 7.7426, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 7.9216, 8.0632, 8.2035, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 8.7757, 8.6747, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.5769, 10.4829, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.7444, 10.8544, 10.9637, 10.8729, 10.9816, 10.8916,\n 10.8025, 10.7141, 10.8224, 10.9301, 11.0371, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.1810, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.7326, 11.6487, 11.5655, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.1805, 12.0990, 12.0180, 11.9377,\n 12.0355, 12.1329, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.4547, 12.3764, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.5986,\n 12.6918, 12.7847, 12.7082, 12.8007, 12.7248, 12.6494, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What causes stool color to change to yellow?\nQuestion 2: What can cause stool to come out as little balls?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -1.9599, -2.0250, -1.7408, -1.8074, -1.8728, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.0997, -2.1442, -1.9749, -2.0197, -2.0641, -1.8974,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -2.0948, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.0203, -2.0605,\n -1.9137, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.7857, -1.8257,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.6827, -1.7219, -1.5848, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.5492, -1.5878, -1.6262, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.4777, -1.5159, -1.5539, -1.5916, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.7250, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 9.0370,\n 8.9169, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.5191, 9.6470, 9.7738, 9.8995,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.2522, 11.3644, 11.2623, 11.3740, 11.4849, 11.3842,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.3091, 11.4184, 11.5271, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.7766, 11.8818,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.3128, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.3361, 13.2499, 13.3447, 13.4390, 13.3537, 13.2690, 13.3631,\n 13.4567, 13.3728, 13.4661, 13.5589, 13.6514, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.9343, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.2200, 14.3087, 14.2282, 14.3166, 14.4046, 14.3248, 14.2455, 14.3333,\n 14.4208, 14.3422, 14.2640, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What can one do after MBBS?\nQuestion 2: What do i do after my MBBS ?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -0.9608, -1.0000,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.0014, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.4540, 8.3425, 8.2325, 8.1240,\n 8.2619, 8.1550, 8.0495, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 8.9586, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 10.8916,\n 10.8025, 10.7141, 10.6265, 10.5397, 10.6481, 10.5621, 10.4769, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.6306, 10.7367, 10.6537, 10.5714, 10.4898,\n 10.5955, 10.7006, 10.6196, 10.7242, 10.6439, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.1018, 11.0235, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.9936, 10.9176, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.9586, 12.0529, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Where can I find a power outlet for my laptop at Melbourne Airport?\nQuestion 2: Would a second airport in Sydney, Australia be needed if a high-speed rail link was created between Melbourne and Sydney?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -1.7923, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.3962, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.2966, -2.3351, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.6178, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.7013, -2.7358, -2.7701, -2.8043, -2.8383, -2.8721, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 5.7192, 5.8919, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.1111, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.0698, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.7696, 11.8719, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.2034, 12.1184, 12.2178, 12.3167, 12.2325,\n 12.1489, 12.2474, 12.1646, 12.0824, 12.0008, 11.9197, 11.8393, 11.7595,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 11.9927, 11.9147, 12.0114,\n 11.9340, 11.8571, 11.9534, 12.0493, 12.1447, 12.0685, 12.1635, 12.2581,\n 12.1825, 12.1073, 12.0327, 12.1270, 12.0529, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.0209, 11.9487, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How not to feel guilty since I am Muslim and I'm conscious we won't have sex together?\nQuestion 2: I don't beleive I am bulimic, but I force throw up atleast once a day after I eat something and feel guilty. Should I tell somebody, and if so who?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "59", + "# Tokens in Greenlist": "13", + "Fraction of T in Greenlist": "22.0%", + "z-score": "-0.526", + "p value": "0.701", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.6%", + "z-score": "12.9", + "p value": "2.78e-38", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 6.7626, 6.9204, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.6679, 6.8205, 6.9714,\n 6.8641, 6.7583, 6.9076, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.5396, 8.4444, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.2202, 9.3408, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.0370, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.5620, 9.4763, 9.5902, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.3333, 10.2509, 10.1692, 10.0881, 10.1955, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.7175, 10.8200, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.9340, 12.0302, 11.9534, 12.0493, 12.1447, 12.2397, 12.1635, 12.2581,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How is air traffic controlled?\nQuestion 2: How do you become an air traffic controller?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.1019, -2.1602,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.1320, -2.1866, -2.2404, -2.2937, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.3554, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.4908, -2.3293, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.4495, -2.3063, -2.3443, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -3.0995, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.2378, 4.0980, 4.3142, 4.1779, 4.0451, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.6790, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 5.9954, 5.8812, 5.7689, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.1393,\n 7.0379, 7.1813, 7.3233, 7.2232, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.5769, 8.4868, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.6770, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.4474, 9.5620, 9.4763, 9.5902, 9.5054,\n 9.6186, 9.5346, 9.4513, 9.3686, 9.4812, 9.5931, 9.7044, 9.6225,\n 9.7331, 9.6519, 9.5714, 9.6814, 9.7908, 9.7109, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.8014, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the best self help book you have read? Why? How did it change your life?\nQuestion 2: What are the top self help books I should read?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.5348, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.5547, 0.5069, 0.4593, 0.4121, 0.5477,\n 0.5005, 0.4536, 0.5879, 0.5410, 0.4944, 0.4481, 0.5808, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.7461, 0.7029, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.5337, 8.6678, 8.8007, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.9813, 9.8858, 10.0029, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.2763, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.9107, 10.8224, 10.9301, 11.0371, 10.9497, 11.0562, 11.1621,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.1646, 12.2627, 12.3603, 12.2782, 12.3754, 12.2940,\n 12.3908, 12.3100, 12.4065, 12.5024, 12.4223, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.6439, 12.7378, 12.8313, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.9540, 13.0460, 12.9691, 13.0608, 13.1520, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.3615, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can I enter University of Melbourne if I couldn't achieve the guaranteed marks in Trinity College Foundation?\nQuestion 2: University of the Philippines: If I take a second BFA in the UP College of Fine Arts, can I be exempted from gen. ed. or core subjects?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.5842,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.1640, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.1884, -2.0512, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.0799, -2.1167, -1.9843, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.2016, 7.0973, 6.9945, 6.8931,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.1591, 7.2960, 7.2029, 7.1110, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.3333, 7.4655, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.6785, 7.8065, 7.7192, 7.8463, 7.7598, 7.6742, 7.5895,\n 7.5056, 7.6315, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 7.9608,\n 7.8791, 8.0006, 7.9196, 8.0403, 8.1602, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.3550, 8.2760, 8.1976, 8.1198, 8.2365, 8.3525, 8.2754,\n 8.3906, 8.5052, 8.6190, 8.7323, 8.6556, 8.7681, 8.8800, 8.8039,\n 8.7284, 8.8396, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.6322, 9.5577, 9.6635, 9.7688, 9.8736, 9.9778,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.6455, 10.7451, 10.8444, 10.7714, 10.8702, 10.9685, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.3091, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Do you need a passport to go to Jamaica from the United States?\nQuestion 2: How can I move to Jamaica?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "13.1%", + "z-score": "-3.89", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.6803, -2.7406, -2.7998, -2.8577,\n -2.9146, -2.9704, -3.0253, -3.0792, -3.1322, -3.1844, -3.2358, -3.2863,\n -2.9755, -3.0290, -3.0816, -3.1334, -3.1845, -2.8943, -2.9475, -3.0000,\n -2.7218, -2.7761, -2.8296, -2.8823, -2.6171, -2.3570, -2.4133, -2.4689,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -3.1730, -3.2157, -3.2579, -3.2998,\n -3.3414, -3.3826, -3.4235, -3.4641, -3.5044, -3.5443, -3.5839, -3.6233,\n -3.6623, -3.7011, -3.7396, -3.7778, -3.8157, -3.8534, -3.8908, -3.9279,\n -3.9648, -4.0015, -4.0379, -4.0740, -3.8965, -3.9331, -3.9694, -4.0056,\n -4.0415, -3.8680, -3.9043, -3.9404, -3.7697, -3.8061, -3.8424, -3.8784,\n -3.7108, -3.5446, -3.5815, -3.6181, -3.4543, -3.4913, -3.5280, -3.5645,\n -3.4035, -3.4403, -3.4769, -3.5132, -3.5494, -3.5853, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.6068, -3.6420, -3.6770, -3.7117, -3.5590,\n -3.5941, -3.6289, -3.6635, -3.6980, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.9673, -4.0003, -4.0330, -4.0656, -4.0980,\n -4.1303, -4.1624, -4.1944, -4.2262, -4.0822, -4.1143, -4.1461, -4.0038,\n -4.0359, -4.0678, -4.0996, -3.9590, -3.8194, -3.8516, -3.8838, -3.7455,\n -3.7778, -3.8100, -3.8420, -3.7055, -3.7376, -3.7697, -3.8016, -3.8333,\n -3.8649, -3.7306, -3.7624, -3.7940, -3.8255, -3.8569, -3.8881])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "45.0%", + "z-score": "4.81", + "p value": "7.5e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.0290, 3.2577, 3.1334, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.3147, 3.5228, 3.4101, 3.2998, 3.5032, 3.3947,\n 3.5942, 3.7905, 3.6831, 3.5777, 3.4743, 3.3729, 3.5642, 3.7528,\n 3.9386, 4.1219, 4.0205, 4.2008, 4.1008, 4.0024, 3.9056, 4.0825,\n 4.2571, 4.1612, 4.3333, 4.5034, 4.4083, 4.3146, 4.2222, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.0316, 3.9452, 4.1090, 4.0234, 3.9389,\n 4.1003, 4.0166, 4.1761, 4.3339, 4.2507, 4.1684, 4.0872, 4.0069,\n 4.1621, 4.3158, 4.4680, 4.6188, 4.5384, 4.6876, 4.6079, 4.5291,\n 4.4511, 4.5983, 4.7442, 4.6667, 4.8111])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the district of Edgware and how does the lifestyle compare to the London Borough of Islington?\nQuestion 2: What is the county of Edgware and how does the lifestyle compare to the London Borough of Enfield?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "28.8%", + "z-score": "0.743", + "p value": "0.229", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.0861, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.7433])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.9%", + "z-score": "10.7", + "p value": "8.37e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.5547, 4.3916, 4.2339, 4.0814, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.1779, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.4836, 4.3618, 4.2426, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.8857, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.3089, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.4006, 6.2994, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.7416, 6.8849, 7.0268, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.0211, 7.1591, 7.2960, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.6867, 7.8168, 7.9460, 8.0741,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.4868, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.1856,\n 9.0991, 9.0134, 9.1302, 9.0453, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.6016, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.9067, 10.0131, 9.9357, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 10.0231, 10.1273, 10.0523, 10.1559,\n 10.2591, 10.1846, 10.1106, 10.0371, 10.1398, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.3999, 10.3280, 10.4281, 10.3566, 10.2856, 10.2151,\n 10.1450, 10.0753, 10.1750, 10.2743, 10.3730, 10.3038, 10.2350, 10.3333,\n 10.4312, 10.5286, 10.6256, 10.5573, 10.6538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What will be Hillary Clinton's policy towards India if she becomes president?\nQuestion 2: What will be Hilary Clinton's policy towards India if she become President?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.5181, -1.5759, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -2.0641, -2.1082,\n -1.9420, -1.9863, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.1367, -2.1783,\n -2.2197, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.2478,\n -2.2871, -2.3262, -2.1796, -2.2188, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.3538, -2.3912, -2.4283, -2.2892, -2.3264, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.4553, -2.4910, -2.5265, -2.3938, -2.4294, -2.2977, -2.3333,\n -2.3688, -2.2384, -2.1086, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 4.9010, 4.6268, 4.3710, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.4849, 11.3842,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 11.8392,\n 11.9457, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.2868, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 12.9011, 13.0000, 13.0984, 13.0071, 13.1050, 13.2025, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.6990, 13.7926, 13.7054, 13.6188, 13.5329, 13.6264, 13.7194,\n 13.6343, 13.7270, 13.8193, 13.9111, 13.8270, 13.9185, 13.8350, 13.9262,\n 13.8434, 13.7612, 13.8522, 13.7706, 13.8613, 13.9515, 14.0414, 14.1309,\n 14.0502, 13.9700, 14.0593, 13.9797, 13.9007, 13.9897, 13.9113, 14.0000,\n 13.9221, 14.0106, 13.9332, 14.0214, 14.1091, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the responsibility of SAP ERP key user?\nQuestion 2: What is a qualified SAP ERP key user?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321, 2.1004, 1.9052,\n 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570, 2.1939, 2.4910, 2.3333,\n 2.6186, 2.4659, 2.3190, 2.1776, 2.4495, 2.3116, 2.5744, 2.4398, 2.3094,\n 2.1831, 2.0605, 2.3113, 2.1909, 2.0738, 1.9599, 1.8489, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.3333, 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.4142,\n 1.3234, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547,\n 0.8660, 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582, 0.1925,\n 0.3825, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924, 0.4284, 0.6086,\n 0.7868, 0.9631, 0.8980, 1.0719, 1.0070, 0.9428, 1.1138, 1.0498, 0.9864,\n 0.9238, 0.8617, 0.8003, 0.7395, 0.6794, 0.8452, 0.7851, 0.7256, 0.8889,\n 0.8295, 0.9909, 1.1508, 1.0911, 1.0319, 0.9733, 0.9152, 0.8577, 0.8006,\n 0.7441, 0.6880, 0.6325, 0.5774, 0.7318, 0.6768, 0.8296, 0.7746, 0.7201,\n 0.6660, 0.6124, 0.7625, 0.7089, 0.6558, 0.6030, 0.7509, 0.6983, 0.6460,\n 0.5941, 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.6276, 0.5774,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620, 0.5134,\n 0.4652, 0.4174, 0.3698, 0.5069, 0.4593, 0.4121, 0.3651, 0.5005, 0.4536,\n 0.4070, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563, 0.3109, 0.4428, 0.3974,\n 0.3522, 0.3073, 0.4377, 0.3928, 0.3482, 0.4774, 0.4327, 0.3884, 0.3443,\n 0.3004, 0.4280, 0.5548, 0.6810, 0.8065, 0.7620, 0.7177, 0.6737, 0.6299,\n 0.5864, 0.7102, 0.6667, 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103,\n 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 10.1124, 10.2375, 10.3615, 10.2476, 10.3709,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.5131, 10.6329, 10.7518, 10.8699,\n 10.9870, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.4759, 11.5866, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.4567, 12.3586, 12.4625, 12.3655, 12.4689, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.7875, 12.8877, 12.7943, 12.8942,\n 12.9935, 12.9011, 12.8095, 12.9085, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.1198, 13.0311, 13.1279, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.2633, 14.3537, 14.2686, 14.3587, 14.4484,\n 14.3642, 14.4536, 14.5426, 14.6313, 14.7195, 14.8074, 14.8950, 14.9821,\n 14.8991, 14.9860, 15.0726, 15.1587, 15.2446, 15.3301, 15.2481, 15.3333,\n 15.4182, 15.5028, 15.5870, 15.6709, 15.7545, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which is the best book to study TENSOR for general relativity from basic?\nQuestion 2: Which is the best book for tensor calculus?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.2624, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.0501, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.3698, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, 0.1317, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.2111, 0.1684, 0.1260, 0.2513, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.0814, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.4521, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 7.8889, 8.0212, 7.9259,\n 7.8318, 7.9630, 7.8699, 7.7778, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.9839, 7.8948, 7.8065, 7.7192, 7.6328, 7.7598, 7.6742, 7.5895,\n 7.7155, 7.8406, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.1214, 8.2413, 8.3605, 8.4788, 8.3977, 8.5153,\n 8.4348, 8.3550, 8.2760, 8.1976, 8.1198, 8.0427, 8.1594, 8.2754,\n 8.1988, 8.1229, 8.2381, 8.1628, 8.0880, 8.2024, 8.1282, 8.2420,\n 8.3550, 8.2813, 8.2082, 8.1356, 8.0636, 7.9921, 8.1043, 8.2158,\n 8.3268, 8.4371, 8.3660, 8.2954, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.8379, 8.7676, 8.6978, 8.6284, 8.5595, 8.6662, 8.7724, 8.8780,\n 8.8094, 8.7414, 8.8464, 8.7788, 8.7116, 8.6448, 8.7492, 8.8531,\n 8.9565, 9.0593, 8.9929, 8.9268, 9.0292, 9.1310, 9.2324, 9.3333,\n 9.4338, 9.5338, 9.6334, 9.7325, 9.6666, 9.6011, 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How is being gay or lesbian less moral than divorce?\nQuestion 2: \"Why do a lot of theists and agnostics confuse mainstream atheistic thought with \"\"positive atheism\"\"?\"\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.5855, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "67.2%", + "z-score": "13.7", + "p value": "4.79e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.6751, 7.8420, 7.6862, 7.8512, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 7.8923, 8.0498, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.5366, 9.6719, 9.5443, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.4311, 11.3143, 11.4311, 11.5470, 11.4323, 11.5476, 11.4345, 11.5492,\n 11.6631, 11.7762, 11.8885, 12.0000, 11.8895, 12.0005, 12.1107, 12.0020,\n 12.1117, 12.0044, 12.1136, 12.0077, 12.1164, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.5460, 12.4434, 12.3419, 12.4471, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.7660, 12.6684, 12.5717, 12.6739,\n 12.7755, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.2025, 13.1122,\n 13.0226, 12.9337, 12.8456, 12.9430, 12.8556, 12.7690, 12.8661, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.8037, 12.7199, 12.6367, 12.5542, 12.6504,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.5979, 12.5179, 12.4384, 12.5336,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.8313, 12.9244, 12.8464, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.7042])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do you thank a Disneyland cast member?\nQuestion 2: How can I go to Disneyland with little money?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.6499,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.5511, 1.7150, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.7778, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.9920, 1.9298, 1.8682, 1.8071, 1.7465, 1.8974,\n 2.0470, 1.9863, 1.9261, 1.8665, 1.8074, 1.9545, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 2.0101, 2.1527, 2.0948, 2.0373, 1.9803,\n 1.9237, 2.0642, 2.2037, 2.1470, 2.0907, 2.0349, 1.9795, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.0881, 2.0338, 2.1685, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.0868, 2.2188, 2.3500, 2.2966, 2.2436, 2.1909,\n 2.1386, 2.2680, 2.3967, 2.3443, 2.2923, 2.2406, 2.1892, 2.3163,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.2387, 2.3635, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.4099, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.4553, 2.5754, 2.5265, 2.4778, 2.4294, 2.3812, 2.5000,\n 2.6182, 2.5700, 2.5220, 2.4744, 2.4269, 2.5439, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.7442, 9.8716, 9.9980, 9.8852, 10.0107, 10.1352,\n 10.0242, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 11.1111, 11.2259, 11.1197, 11.2339, 11.3473,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.3524, 12.4567, 12.5604, 12.4625, 12.5657, 12.6684, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.9820, 13.0815, 13.1806, 13.2791,\n 13.3770, 13.4745, 13.5714, 13.4780, 13.5746, 13.6707, 13.5784, 13.6742,\n 13.7694, 13.6781, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.5199, 14.6103,\n 14.7002, 14.6126, 14.7023, 14.7916, 14.7049, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.5783, 15.4940, 15.5792, 15.6640, 15.5805, 15.6651, 15.7494, 15.6667,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the coolest Android hacks and tricks you know?\nQuestion 2: What are some cool hacks for Android phones?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "108", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "27.8%", + "z-score": "0.667", + "p value": "0.252", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.7256, 0.6667])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.6192, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.2229, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 9.9146, 10.0385, 9.9304, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.5623, 10.4592, 10.5763, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.0746, 10.9769, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.5515, 11.6584, 11.5645, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.1125, 12.2150, 12.1244,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.6492, 12.7476, 12.6601, 12.5732, 12.6713, 12.7690, 12.8661, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.1644, 13.2593, 13.1745, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.5589, 13.6514, 13.7434, 13.6604, 13.7521,\n 13.6698, 13.5881, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 14.1309,\n 14.2200, 14.1393, 14.2282, 14.1482, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.6027, 14.6889, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: If you received a check from Donald Knuth, what did you do and why did you get it?\nQuestion 2: How can I contact Donald Knuth?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.2247,\n -1.2708, -1.1140, -0.9584, -1.0050, -1.0513, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.9245, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -0.9439, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.5220, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "165", + "Fraction of T in Greenlist": "82.9%", + "z-score": "18.9", + "p value": "1.06e-79", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.3881, 9.5321, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 10.2172, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.8612, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.3389, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.9338, 12.0499, 11.9230, 12.0386, 12.1533, 12.2671, 12.3801, 12.4922,\n 12.6035, 12.4807, 12.5916, 12.7017, 12.8110, 12.9196, 13.0274, 13.1344,\n 13.0154, 13.1221, 13.2280, 13.3333, 13.4379, 13.5419, 13.6451, 13.5295,\n 13.6324, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.0253, 14.1248,\n 14.2238, 14.3222, 14.4200, 14.5173, 14.6141, 14.5045, 14.6010, 14.6969,\n 14.7924, 14.8873, 14.9817, 15.0756, 14.9687, 15.0624, 15.1556, 15.2483,\n 15.3405, 15.4323, 15.5236, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.9591, 15.8571, 15.9459, 16.0342, 16.1220, 16.2095, 16.2966,\n 16.3833, 16.2835, 16.3700, 16.4561, 16.5418, 16.6272, 16.7122, 16.7968,\n 16.6991, 16.7835, 16.8676, 16.9514, 17.0348, 17.1178, 17.2005, 17.1047,\n 17.1873, 17.2695, 17.3514, 17.4329, 17.5142, 17.5951, 17.5011, 17.5818,\n 17.6623, 17.7424, 17.8223, 17.9018, 17.9810, 17.8888, 17.9678, 18.0466,\n 18.1251, 18.2034, 18.2813, 18.3589, 18.2683, 18.3458, 18.4230, 18.5000,\n 18.5767, 18.6531, 18.7292, 18.6402, 18.7162, 18.7920, 18.8675])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which are the best motivational videos?\nQuestion 2: What are some of the best motivational clips?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.3904, -1.4335, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.5706, -1.6087, -1.6466, -1.5159, -1.5539, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.2435, -1.1163, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.1435, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.6423, 8.7943, 8.6461, 8.7967, 8.9455, 9.0924, 8.9489,\n 8.8082, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.3288, 10.1982, 10.0698,\n 10.1999, 10.0737, 10.2030, 10.0791, 10.2075, 10.0857, 10.2132, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.9727, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.5556, 11.6683, 11.5601, 11.6723, 11.7838,\n 11.8944, 11.7881, 11.8982, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.4405, 12.5460, 12.6508, 12.7550, 12.8586, 12.9616, 13.0639,\n 13.1657, 13.0643, 13.1657, 13.0655, 13.1665, 13.0674, 13.1680, 13.0699,\n 13.1701, 13.0732, 13.1730, 13.2722, 13.3710, 13.4691, 13.5668, 13.4715,\n 13.3770, 13.4745, 13.5714, 13.4780, 13.5746, 13.6707, 13.7663, 13.8615,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.3301, 14.2388, 14.3313, 14.2408,\n 14.3330, 14.4248, 14.5161, 14.4267, 14.5178, 14.4292, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.8789, 14.9677, 15.0560, 15.1440, 15.2316, 15.1448,\n 15.0585, 15.1460, 15.2331, 15.1477, 15.2345, 15.1498, 15.0657, 15.1524,\n 15.0689, 15.1553, 15.2414, 15.1587, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I lose weight fast?\nQuestion 2: What is the best way to reduce weight fast?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, 0.1974, 0.1307, 0.3248, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.6747, 8.5749, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 8.9178, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.6307, 9.7473, 9.8632, 9.9783, 9.8877, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.8494, 9.7622, 9.6758, 9.5902, 9.5054,\n 9.4213, 9.5346, 9.4513, 9.3686, 9.4812, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.6814, 9.6016, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.5840, 9.5066, 9.6148, 9.7224, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.2029, 10.3065, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.7189, 10.8186, 10.9178, 10.8444, 10.9431, 10.8702, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: If a die is rolled, what is the probability that the number is greater than 4?\nQuestion 2: If a die is rolled. what is the probability that the number on top is a 3?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.4162, -1.2839, -1.3230, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.1929, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.1435, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.9169, 9.0536, 8.9355, 8.8192, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 8.8853, 8.7758, 8.9086, 8.8007, 8.6942, 8.8260, 8.9567,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 9.9601, 9.8634, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.0249, 9.9315, 9.8389, 9.9547, 10.0698, 9.9783, 9.8877, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.2514, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.3616, 11.4638, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 12.0990, 12.0180, 11.9377,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.5495, 12.6439, 12.7378, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.6918, 12.7847, 12.8771, 12.8007, 12.7248, 12.8169, 12.7416, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.7017, 12.7928, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the best resources for learning Morse code?\nQuestion 2: What is Morse code?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -1.8974, -1.9702, -1.6330,\n -1.7086, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.4059, -1.4536, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.3833, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.3036, -1.1547,\n -1.1987, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.1094, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.2326, -1.2730, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 2.5820, 2.3938, 2.2156, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.1156, 3.3665, 3.2205, 3.4641, 3.7017, 3.9337, 3.7905, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.3571, 10.4745,\n 10.5909, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 10.9626,\n 11.0746, 11.1860, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.5391, 11.6465, 11.7533, 11.6584, 11.7647, 11.8704, 11.9754, 12.0798,\n 12.1836, 12.0902, 12.1936, 12.2963, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.7279, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.0936, 14.1842, 14.2744,\n 14.1906, 14.2805, 14.3700, 14.4591, 14.5479, 14.6362, 14.5535, 14.6416,\n 14.7293, 14.6473, 14.7348, 14.8219, 14.9086, 14.9950, 15.0810, 15.0000,\n 15.0858, 15.1712, 15.0909, 15.1761, 15.2609, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Whether alloy are only isotropic and homogeneous like metals, or alloys also exhibit orthotropic/anisotropic and heterogeneous like CompositeMaterials?\nQuestion 2: What is the best backend for my app?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 5.0483, 5.3333, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.4175, 5.1698, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.4285, 8.5672, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.1101, 9.0067, 9.1343, 9.0323, 9.1590, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.4829, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.5427, 10.4524, 10.5632, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.7141, 10.8224, 10.7349, 10.8426, 10.9497, 10.8631, 10.9697,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.2194, 11.3232, 11.4263,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.7498, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.8846, 11.8028, 11.7217, 11.6412, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 11.8956, 11.9927, 12.0893, 12.0114,\n 12.1076, 12.2033, 12.1260, 12.2214, 12.3163, 12.4109, 12.3342, 12.4283,\n 12.5221, 12.4460, 12.5394, 12.6323, 12.7248, 12.8169, 12.7416, 12.8333,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.7928, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How can I make me believe that everything is going good in life and get satisfaction when nothing is going right?\nQuestion 2: What type of government does France currently have and how has it benefited the country?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.2722,\n -0.0676, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.3558, 0.5064, 0.4540, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 1.0954,\n 1.0465, 1.1794, 1.3114, 1.2623, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.0096, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.1251, 1.2514, 1.3771, 1.3303, 1.2839, 1.2377, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.5159, 1.4699, 1.4241, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.4444, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 5.9530, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.6603, 8.5435, 8.4285, 8.5672, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 10.8498, 10.9626,\n 10.8647, 10.7678, 10.6719, 10.7843, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.6059, 11.7108, 11.8151, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.5615, 12.6601, 12.7581, 12.8556, 12.9527, 12.8661, 12.9628,\n 12.8769, 12.7918, 12.7073, 12.8037, 12.7199, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.5985, 13.6896, 13.7803, 13.8707, 13.9606,\n 14.0502, 13.9700, 13.8904, 13.9797, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.4382, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How does an IQ test work and what is determined from an IQ test?\nQuestion 2: How does IQ test works?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.1309, -2.1768, -2.2222, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.6934, -2.7325, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -2.8845, -2.9216, -2.7735, -2.8107, -2.8478, -2.7014, -2.7386,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.8174, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -2.9263, -2.9611, -2.9957, -2.8595, -2.8943,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.7297, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.7358, -2.7701, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.0551, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.1828, 6.0622,\n 5.9438, 6.1118, 5.9954, 5.8812, 5.7689, 5.9346, 5.8241, 5.7155,\n 5.8789, 6.0404, 5.9333, 6.0928, 6.2505, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.2483, 6.1471, 6.0474, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.2104, 7.3485, 7.2532, 7.3901, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.8699, 7.7778, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.8948, 7.8065, 7.9336, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.3813, 8.2956, 8.4173, 8.3324, 8.4532, 8.3691,\n 8.2858, 8.2032, 8.3231, 8.2413, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.5516, 8.6677, 8.5879, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.6556, 8.7681, 8.6921, 8.6166,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.8008, 8.7270, 8.8369, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.3374, 9.4432,\n 9.5485, 9.4761, 9.5808, 9.6850, 9.6130, 9.7167, 9.6452, 9.7483,\n 9.8510, 9.9531, 9.8821, 9.9837, 10.0848, 10.0143, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.2743, 10.2050, 10.3038, 10.2350, 10.3333,\n 10.2650, 10.3628, 10.4603, 10.3923, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is it safe to use Xiaomi mobile phones?\nQuestion 2: Is it safe or unsafe to use Xiaomi Products?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.8165,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.5023, 0.4444, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.1761, 0.1317, 0.2626, 0.2182, 0.3482,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "165", + "Fraction of T in Greenlist": "82.9%", + "z-score": "18.9", + "p value": "1.06e-79", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.3485, 7.5186, 7.3659, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.5491, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.7098, 10.8327, 10.9546, 11.0755, 11.1954, 11.0761, 11.1954, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.3161, 11.4323, 11.5476, 11.6620, 11.5492,\n 11.6631, 11.7762, 11.8885, 12.0000, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.2768, 13.3789, 13.4804, 13.5813, 13.6816, 13.7813, 13.6763,\n 13.7757, 13.8745, 13.9728, 14.0705, 14.1677, 14.2644, 14.3605, 14.4562,\n 14.5513, 14.4493, 14.5442, 14.6385, 14.7324, 14.8257, 14.9187, 15.0111,\n 15.1031, 15.1946, 15.2857, 15.3764, 15.4666, 15.5563, 15.6457, 15.7346,\n 15.8232, 15.9113, 15.9990, 16.0863, 15.9889, 16.0760, 16.1628, 16.2491,\n 16.3351, 16.4207, 16.5059, 16.5907, 16.6752, 16.7593, 16.6644, 16.7484,\n 16.8320, 16.9152, 16.9982, 17.0807, 17.1630, 17.2449, 17.3265, 17.4078,\n 17.4887, 17.5693, 17.6497, 17.7297, 17.8094, 17.8888, 17.9678, 18.0466,\n 18.1251, 18.0340, 18.1124, 18.1905, 18.2683, 18.3458, 18.4230, 18.5000,\n 18.5767, 18.6531, 18.7292, 18.6402, 18.7162, 18.7920, 18.8675])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Fetch jobs from job portals through API calls?\nQuestion 2: What are some creative ideas for arranging a freshers' party?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.4003, 1.3206, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.3641, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 1.1547, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.5097, 1.4517, 1.6008, 1.5430, 1.4857, 1.6330,\n 1.5758, 1.7217, 1.6646, 1.6081, 1.5519, 1.6958, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.3443, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.4570, 1.5848, 1.5363, 1.4881, 1.4402, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.6737, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.7780, 1.7310, 1.6843, 1.6378, 1.7592, 1.7128, 1.8333,\n 1.7870, 1.7410, 1.8605, 1.8145, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 4.8742, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.9316, 4.8038, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 8.8228, 8.7327, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.5620, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.2592, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.4164, 10.3333, 10.4407, 10.3583, 10.2766, 10.1955, 10.1151,\n 10.0353, 9.9562, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.4244, 11.5234, 11.6220, 11.7200, 11.8176, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the best books on cosmology?\nQuestion 2: Which is the best book for cosmology?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.4289,\n -1.4742, -1.5191, -1.3620, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.3717, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "59.4%", + "z-score": "11.1", + "p value": "3.68e-29", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.1825, 8.0358, 7.8923, 8.0498, 8.2054, 8.0656, 7.9286, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.0413, 7.9196, 8.0667, 7.9472, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 8.8036, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.6758, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.7590, 9.8702, 9.7869, 9.8975, 9.8150,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.9800, 10.0881, 10.0076, 10.1151,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.3289, 10.4330, 10.3557, 10.2790, 10.3827, 10.4858, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.6665, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.8186, 10.9178, 10.8444, 10.9431, 10.8702, 10.7978, 10.8961,\n 10.9939, 10.9220, 10.8505, 10.9480, 10.8770, 10.9740, 10.9034, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.0521, 11.1475])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why did it take so long for NASA to find water on Mars?\nQuestion 2: How much chances are there that NASA already knew that there is water on Mars?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.9%", + "z-score": "1.6", + "p value": "0.0543", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.4763, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.6660, 0.8165,\n 0.9658, 0.9115, 1.0593, 1.0050, 1.1514, 1.0973, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.2243, 1.1711, 1.3128, 1.2597, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.3448, 1.2943, 1.2441, 1.3779, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.0215, 1.1513, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 1.0445,\n 1.1717, 1.2982, 1.2514, 1.3771, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.3159, 1.4393, 1.5621, 1.5159, 1.6378, 1.5916, 1.7128, 1.6667,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "57.9%", + "z-score": "9.57", + "p value": "5.37e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415, 3.7808, 3.5382,\n 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426, 4.5033, 4.3027, 4.5556,\n 4.3644, 4.1812, 4.4272, 4.6663, 4.8990, 4.7237, 4.9507, 4.7819, 4.6188,\n 4.4610, 4.6829, 4.8999, 5.1121, 5.3199, 5.1671, 5.0186, 5.2223, 5.0779,\n 5.2778, 5.1371, 5.0000, 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140,\n 5.6830, 5.5549, 5.7354, 5.6099, 5.7877, 5.9628, 5.8398, 5.7192, 5.8919,\n 6.0622, 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.3089, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.7132, 6.6075,\n 6.5033, 6.6541, 6.5514, 6.7006, 6.5993, 6.4993, 6.6469, 6.7931, 6.9378,\n 7.0812, 6.9824, 6.8849, 6.7886, 6.9305, 7.0711, 7.2104, 7.1152, 7.0211,\n 6.9282, 7.0662, 7.2029, 7.3386, 7.4730, 7.3810, 7.5143, 7.6466, 7.7778,\n 7.9079, 7.8168, 7.7268, 7.6376, 7.7667, 7.8948, 8.0219, 7.9336, 8.0598,\n 7.9724, 7.8859, 7.8003, 7.9254, 8.0497, 8.1731, 8.2956, 8.2107, 8.1266,\n 8.2483, 8.1650, 8.2858, 8.2032, 8.1214, 8.2413, 8.3605, 8.4788, 8.5964,\n 8.5153, 8.4348, 8.5516, 8.6677, 8.7831, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659, 9.4752,\n 9.5840, 9.5066, 9.4299, 9.3537, 9.4619, 9.5695])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: My Galaxy ace is hang?\nQuestion 2: Why are the people on Staten Island are racist?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.5407, -0.5922, -0.6433, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.1857, 7.0557, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.5784, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.4346, 9.3408, 9.2480, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.3263, 12.2467, 12.3428, 12.2638, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.6918, 12.6153, 12.7082, 12.8007, 12.7248, 12.8169, 12.7416, 12.8333,\n 12.9247, 13.0157, 13.1063, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Where can I learn to invest in stocks?\nQuestion 2: How can I learn more about stocks?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, 0.1879, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 7.9048, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.5206, 8.3423, 8.5057, 8.6667,\n 8.4953, 8.6549, 8.8121, 8.6469, 8.8029, 8.6424, 8.4856, 8.3324,\n 8.1825, 8.3391, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.2717, 9.1455, 9.2828, 9.1589, 9.0370,\n 8.9169, 8.7986, 8.6820, 8.5672, 8.7045, 8.5915, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.1480, 11.0554, 11.1640, 11.0724, 11.1803, 11.2877,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.4261, 11.3373, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.4935, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.7326, 11.8336, 11.7498, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.8846, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.8165, 12.7378, 12.8313, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are major and defining characteristics of Americans?\nQuestion 2: What are some good characteristics of the American culture?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.1264, -1.9906, -2.0282, -2.0656, -1.9311, -1.9686, -2.0059, -1.8728,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.5916, -1.4621, -1.5000,\n -1.3714, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 2.5924, 2.8577,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.9424, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.4816, 3.3566, 3.5753, 3.4528, 3.6667,\n 3.5466, 3.4293, 3.6380, 3.8431, 4.0446, 4.2426, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.4721, 4.6571, 4.5461, 4.4371, 4.3301,\n 4.5115, 4.6904, 4.8669, 5.0410, 5.2129, 5.1065, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.4610, 5.3605, 5.5213, 5.4222,\n 5.5811, 5.4832, 5.3867, 5.5435, 5.4482, 5.6032, 5.5090, 5.6622,\n 5.8139, 5.9641, 5.8707, 5.7785, 5.9270, 5.8358, 5.9827, 5.8926,\n 6.0380, 5.9488, 5.8606, 6.0044, 6.1470, 6.0596, 6.2008, 6.3408,\n 6.2541, 6.3928, 6.3070, 6.2222, 6.1383, 6.0553, 5.9732, 6.1101,\n 6.0287, 6.1644, 6.2991, 6.2183, 6.1382, 6.0590, 5.9806, 6.1137,\n 6.2459, 6.1680, 6.0908, 6.2217, 6.1451, 6.2750, 6.4039, 6.5320,\n 6.6591, 6.7854, 6.9107, 7.0353, 7.1590, 7.0823, 7.2051, 7.3271,\n 7.2510, 7.1755, 7.1007, 7.2217, 7.3419, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.7407, 7.8571, 7.7831, 7.7096, 7.8253, 7.9403, 7.8673,\n 7.7949, 7.7230, 7.8372, 7.9507, 8.0636, 7.9921, 8.1043, 8.2158,\n 8.3268, 8.4371, 8.3660, 8.2954, 8.2252, 8.1556, 8.2652, 8.1960,\n 8.1273, 8.0591, 8.1679, 8.2762, 8.3840, 8.3161, 8.2486, 8.1817,\n 8.2887, 8.3952, 8.3286, 8.2624, 8.1966, 8.3024, 8.2370, 8.3423,\n 8.2773, 8.3820, 8.4862, 8.5899, 8.5252, 8.4608, 8.5640, 8.5000,\n 8.6026, 8.5390, 8.4757, 8.5778, 8.6794, 8.7805, 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What's the best way to spend a long weekend?\nQuestion 2: How do I spend my long weekend in an effective way?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.5700, -2.6047, -2.6393, -2.6737, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 7.1317, 6.9631, 6.7992, 6.9803, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.4536, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 7.8928,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.3496, 10.2554, 10.3695, 10.4829, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.4524, 10.5632, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.6827, 11.7851, 11.8870, 11.8010,\n 11.7157, 11.6311, 11.7326, 11.8336, 11.9341, 12.0341, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.7215, 13.6429, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: HTTP sites are not working while HTTPS sites are working in Google Chrome? What are some solutions?\nQuestion 2: Why are the HTTPS sites not working?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "30.1%", + "z-score": "1.08", + "p value": "0.141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.8444, 1.0328,\n 0.9623, 1.1476, 1.0773])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "64.4%", + "z-score": "12.7", + "p value": "3.62e-37", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.3467, 7.5056,\n 7.3760, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.1706, 9.0629, 9.1924,\n 9.0863, 8.9815, 9.1101, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 9.8414, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.6894, 10.5955, 10.5025, 10.6145,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.2142, 11.3204, 11.2316, 11.1435, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 12.0341, 11.9504, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.7217, 11.8210, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.0712, 12.1677, 12.0893, 12.0114,\n 11.9340, 11.8571, 11.9534, 12.0493, 12.1447, 12.0685, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.7585, 12.6841])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do people join ISIS?\nQuestion 2: Why do people join ISIS?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -1.8598, -1.7219, -1.7609, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.7595, -1.7974, -1.8352, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.7039, -1.7410, -1.7778, -1.6496, -1.6865, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.9646, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.9392, 10.0577, 9.9601, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.2348, 10.3496, 10.4636, 10.3695, 10.4829, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.6936,\n 10.8025, 10.9107, 10.8224, 10.7349, 10.6481, 10.5621, 10.4769, 10.5848,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.7367, 10.8423, 10.7594, 10.6771,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.7217, 11.6412, 11.5613, 11.6606, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.5495, 12.4713, 12.3935, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.5221, 12.6153, 12.7082, 12.6323, 12.7248, 12.8169, 12.9087, 12.8333,\n 12.9247, 13.0157, 12.9410, 13.0316, 13.1219, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Has Ancient Sumer been scientifically tested?\nQuestion 2: Has Ancient History been scientifically tested? Is it all real? Did it happen differently than we were told it did? Did it even happen at all?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "34", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "23.5%", + "z-score": "-0.198", + "p value": "0.578", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "144", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "61.1%", + "z-score": "10", + "p value": "7.07e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.0037, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.7139, 7.6000, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.9495, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.4444, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.2435, 9.1553, 9.2729, 9.1856,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 9.9807, 10.0906, 10.0074])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some of the most beautiful houses in the world?\nQuestion 2: Which are some of the most beautiful houses around the world?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "71", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-0.754", + "p value": "0.774", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.0401, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.0553, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.4444, 8.5715, 8.6976, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.2202, 9.3408, 9.2480, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.1391, 10.2514, 10.3630, 10.4738, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 11.8673,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.3908, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.9755, 12.8957, 12.8165, 12.9099, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.3060, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.8447, 13.9332, 13.8564, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the options if my daughter not got seat in IIT or NIT. At present she is under coaching in Allen, Kota. This year she is in class 11th.?\nQuestion 2: Who is won indutal medal?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.0820, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.1435, 7.3208, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.3871, 9.2536,\n 9.1225, 8.9935, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.7306, 9.8590, 9.9863, 10.1124, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.6232, 10.5131, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.6894, 11.5866, 11.6966, 11.5950,\n 11.4945, 11.6041, 11.5048, 11.4065, 11.3091, 11.2127, 11.1172, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.7395,\n 11.6510, 11.7543, 11.6667, 11.5797, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.9181, 12.0185, 11.9341, 12.0341, 12.1335, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.2627, 12.1805, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.8007, 12.8928, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.2864, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the best lesson we should learn from life?\nQuestion 2: What's the most important lesson about life?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "22", + "Fraction of T in Greenlist": "30.1%", + "z-score": "1.01", + "p value": "0.155", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 1.2019, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 1.0136])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "53.3%", + "z-score": "6.75", + "p value": "7.21e-12", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.4816, 3.7009, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.8177, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 4.9075,\n 5.0844, 4.9747, 4.8669, 4.7610, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.5630, 5.4610, 5.6220, 5.5213, 5.4222,\n 5.5811, 5.7382, 5.6401, 5.7955, 5.9491, 5.8522, 6.0041, 5.9084,\n 5.8139, 5.9641, 6.1128, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.8819, 7.0211, 6.9282, 7.0662, 6.9743, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.7536])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Who are the best technology recruiters in San Diego and Orange County?\nQuestion 2: Which are the best recruiters for technology executives in the san diego Area?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.2813, -2.1172, -2.1602, -2.2030, -2.0412,\n -1.8808, -1.9242, -1.7655, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.6473, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -1.8821, -1.7424, -1.7817,\n -1.6432, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.6057, -1.6444, -1.5104, -1.5492, -1.4162, -1.4551, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.1399, -1.0106, -0.8819, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.6082, 1.8974, 2.1776, 2.0412,\n 2.3116, 2.5744, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.9620, 3.8297, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.1864, 5.0684, 4.9528, 4.8394, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.6667, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.0943, 5.9932, 5.8936, 5.7955, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.7416, 6.8849, 6.7886, 6.6935, 6.5997,\n 6.5069, 6.4153, 6.5569, 6.4663, 6.6066, 6.7456, 6.6559, 6.5672,\n 6.4795, 6.3928, 6.5303, 6.6667, 6.8019, 6.9361, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.1605, 7.2904, 7.2058, 7.1220, 7.0391, 6.9570,\n 6.8757, 7.0043, 7.1319, 7.2587, 7.3845, 7.5094, 7.4286, 7.5526,\n 7.6758, 7.5955, 7.7178, 7.6383, 7.7597, 7.8803, 8.0002, 8.1192,\n 8.2375, 8.3550, 8.4718, 8.5879, 8.7033, 8.8179, 8.9319, 8.8527,\n 8.9660, 8.8874, 9.0000, 9.1119, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.2118, 9.1357, 9.2450, 9.1694, 9.0944, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.4513, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.5998, 10.5278, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.8505, 10.9480, 11.0450, 11.1415, 11.0705, 11.1667,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.4765, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the best answer to what's up?\nQuestion 2: What is the best answer for 'Hmmm'?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.2710, 1.2039, 1.3770, 1.3101, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.3856, 1.5511, 1.4863, 1.4222, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.3166, 1.4629, 1.4071, 1.5519, 1.4963, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.4105, 1.3608, 1.3114, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.5848, 1.7119, 1.6632, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.7693, 1.7213, 1.8453, 1.7974, 1.7498, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.0201, 7.2222, 7.4194, 7.6120, 7.8003, 7.9845, 8.1650,\n 8.3418, 8.5153, 8.2952, 8.0829, 8.2577, 8.0546, 8.2281, 8.0333,\n 7.8445, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.1654, 8.0017, 8.1654, 8.3267, 8.1684, 8.0139, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.4614, 10.3288, 10.4581, 10.5862,\n 10.4565, 10.3287, 10.2030, 10.3310, 10.4579, 10.3347, 10.2132, 10.3397,\n 10.2202, 10.3459, 10.4704, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.3189, 11.2069, 11.3228,\n 11.4378, 11.3276, 11.2187, 11.1111, 11.2259, 11.3399, 11.2339, 11.1291,\n 11.2427, 11.1392, 11.2522, 11.3644, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.1568, 12.0605, 11.9650, 12.0699, 12.1741, 12.0798,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.2040, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 12.9249,\n 12.8359, 12.7476, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.4390, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.5499, 13.6427, 13.7350, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.1149, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.3897, 14.4780, 14.5659, 14.4850, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.5871, 14.6738, 14.7601, 14.8462, 14.7673, 14.6889, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: If Trump were elected, would he pardon Edward Snowden?\nQuestion 2: What is Trump's take on Edward Snowden?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "2.0%", + "z-score": "-7.49", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.7761, -2.8296, -2.8823, -2.9343, -2.9856, -3.0361, -3.0861,\n -3.1353, -3.1840, -3.2321, -3.2796, -3.3265, -3.3729, -3.4187, -3.4641,\n -3.5090, -3.5533, -3.5973, -3.6407, -3.6838, -3.7264, -3.7685, -3.8103,\n -3.8517, -3.8927, -3.9333, -3.9736, -4.0135, -4.0531, -4.0923, -4.1312,\n -4.1698, -4.2080, -4.2460, -4.2836, -4.3209, -4.3580, -4.3948, -4.4313,\n -4.4675, -4.5035, -4.5392, -4.5747, -4.6099, -4.6448, -4.6796, -4.7140,\n -4.7483, -4.7823, -4.8161, -4.8497, -4.8831, -4.9163, -4.9493, -4.9820,\n -5.0146, -5.0469, -5.0791, -5.1111, -5.1429, -5.1745, -5.2060, -5.2372,\n -5.2683, -5.2992, -5.3300, -5.3606, -5.3910, -5.4212, -5.4513, -5.4813,\n -5.5111, -5.5407, -5.5702, -5.5995, -5.6287, -5.6578, -5.6867, -5.7155,\n -5.7441, -5.7726, -5.8010, -5.8292, -5.8573, -5.8853, -5.9132, -5.9409,\n -5.9685, -5.9960, -6.0233, -6.0506, -6.0777, -6.1047, -6.1316, -6.1584,\n -6.1851, -6.2116, -6.2381, -6.2644, -6.2907, -6.3168, -6.3429, -6.3688,\n -6.3946, -6.4203, -6.4460, -6.4715, -6.4969, -6.5223, -6.5475, -6.5727,\n -6.5977, -6.6227, -6.6476, -6.6724, -6.6971, -6.7217, -6.7462, -6.7706,\n -6.7950, -6.8192, -6.8434, -6.8675, -6.8915, -6.9155, -6.9393, -6.9631,\n -6.9868, -7.0104, -7.0340, -7.0574, -7.0808, -7.1041, -7.1274, -7.1506,\n -7.1737, -7.1967, -7.2196, -7.2425, -7.2653, -7.2881, -7.3107, -7.3333,\n -7.3559, -7.3783, -7.4007, -7.4231, -7.4453, -7.4676, -7.4897])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.4836, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.5118, 4.3970, 4.5850, 4.7703, 4.6571, 4.8394, 5.0190, 4.9075,\n 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.1333, 5.0332, 5.1978, 5.0990, 5.2615, 5.1640,\n 5.0679, 5.2281, 5.3867, 5.5435, 5.6986, 5.6032, 5.5090, 5.6622,\n 5.5691, 5.4772, 5.6286, 5.5377, 5.4480, 5.3594, 5.2719, 5.4212,\n 5.3345, 5.4822, 5.3964, 5.3116, 5.4576, 5.6023, 5.7457, 5.8878,\n 5.8034, 5.9442, 6.0838, 6.2222, 6.3595, 6.4957, 6.6308, 6.7648,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.4193, 7.3346, 7.4625, 7.3786,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.0006, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.7831, 8.8978, 8.8179, 8.9319, 8.8527,\n 8.7742, 8.6963, 8.8095, 8.9221, 9.0340, 9.1452, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.0814, 10.0074, 9.9340, 10.0371, 10.1398, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 10.9220, 11.0194, 11.1164, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What would be the scenario of COE placement in Thapar University by 2020 considering the no. of students from CAG and CML too getting pushed in COE?\nQuestion 2: I got selected in Infosys via campus placement in September 2015 and received my letter of intent in June 2016. When can I expect the offer letter?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.1429, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.2756, -0.1374, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.2657, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "59.1%", + "z-score": "11.1", + "p value": "8e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.4222, 5.2778, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 5.8919, 5.7735,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.0469, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.4993, 6.4008,\n 6.5483, 6.4510, 6.5970, 6.7416, 6.8849, 6.7886, 6.6935, 6.8354,\n 6.7414, 6.6486, 6.7890, 6.9282, 6.8364, 6.9743, 7.1110, 7.0201,\n 6.9303, 6.8414, 6.9768, 7.1111, 7.2443, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.0296, 7.1605, 7.2904, 7.4193, 7.5472, 7.4625, 7.5895,\n 7.7155, 7.8406, 7.7566, 7.6734, 7.5910, 7.5094, 7.6335, 7.7567,\n 7.8791, 8.0006, 7.9196, 8.0403, 8.1602, 8.2793, 8.3977, 8.3172,\n 8.2375, 8.3550, 8.4718, 8.5879, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.2118, 9.3212, 9.2450, 9.3537, 9.4619, 9.5695, 9.4939,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.0814, 10.0074, 10.1106, 10.2132, 10.1398, 10.2419, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.5001, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.6544, 10.7527, 10.6817, 10.6111, 10.7090, 10.8064, 10.9034, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.0521, 10.9829, 11.0782])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I draw bending moment and shear force diagram for beams?\nQuestion 2: How do I draw shear force and bending moment diagrams (strength of materials)?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.2144, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.4233, -0.4644, -0.5053, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.2487, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.7698, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.9599, 2.2011, 2.0889, 2.3238, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.3445, 2.5621, 2.4585, 2.6713, 2.8804, 3.0861,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.6522, 3.8376, 3.7383, 3.6407, 3.8228, 4.0024, 4.1797, 4.0825,\n 3.9869, 4.1612, 4.3333, 4.2385, 4.1451, 4.3146, 4.2222, 4.1312,\n 4.2981, 4.4630, 4.3727, 4.2836, 4.4462, 4.3580, 4.2710, 4.4313,\n 4.5899, 4.7469, 4.9023, 5.0562, 5.2086, 5.3594, 5.2719, 5.4212,\n 5.5690, 5.4822, 5.3964, 5.3116, 5.4576, 5.3736, 5.5181, 5.6614,\n 5.8034, 5.9442, 5.8605, 6.0000, 6.1383, 6.0553, 6.1924, 6.3283,\n 6.4632, 6.5970, 6.5144, 6.6471, 6.5653, 6.4842, 6.4040, 6.3246,\n 6.4558, 6.5861, 6.7155, 6.8439, 6.9714, 6.8922, 6.8138, 6.9402,\n 7.0658, 7.1904, 7.3143, 7.2363, 7.1590, 7.2818, 7.2051, 7.1291,\n 7.2510, 7.3721, 7.2966, 7.4168, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.0880, 8.2024, 8.1282, 8.0546,\n 8.1683, 8.2813, 8.2082, 8.3205, 8.4322, 8.3595, 8.4706, 8.3984,\n 8.3268, 8.4371, 8.5469, 8.4757, 8.5848, 8.6933, 8.8013, 8.9087,\n 9.0155, 9.1218, 9.0510, 9.1567, 9.0863, 9.0164, 8.9469, 8.8780,\n 8.9830, 8.9145, 8.8464, 8.7788, 8.7116, 8.6448, 8.5785, 8.5126,\n 8.4471, 8.5513, 8.4862, 8.5899, 8.6932, 8.7959, 8.7311, 8.8333,\n 8.7689, 8.7048, 8.8065, 8.9077, 8.8439, 8.9446, 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is there such a thing as a political center? How does it look like and can it be achieved?\nQuestion 2: There is a good looking guy that acts like he is by boyfriend and that we have a thing. What does it mean?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.5", + "p value": "0.933", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.4976])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "111", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "58.6%", + "z-score": "8.17", + "p value": "1.61e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660, 2.8868,\n 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.1305, 3.4641, 3.7808, 4.0825,\n 3.8411, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426, 4.0415, 4.3027, 4.5556,\n 4.8008, 5.0389, 5.2705, 5.4958, 5.3072, 5.1257, 4.9507, 4.7819, 5.0037,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.3199, 5.5234, 5.7229, 5.5705, 5.7664,\n 5.9588, 6.1477, 6.0000, 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425,\n 6.6172, 6.7893, 6.9589, 7.1261, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557,\n 7.2169, 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.2296, 7.3773,\n 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 7.8779, 8.0171, 7.9115,\n 8.0495, 7.9455, 7.8428, 7.7414, 7.8782, 8.0139, 7.9138, 7.8150, 7.7174,\n 7.8520, 7.9853, 7.8889, 7.7937, 7.9259, 8.0571, 8.1873, 8.0931, 8.2222,\n 8.1291, 8.2572, 8.1651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What would happen if water disappeared from the earth?\nQuestion 2: What would happen if ants disappeared from the Earth?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "10", + "# Tokens in Greenlist": "1", + "Fraction of T in Greenlist": "10.0%", + "z-score": "-1.1", + "p value": "0.863", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.6192, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 10.9123, 10.8186,\n 10.9291, 10.8363, 10.7444, 10.6534, 10.5632, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.9107, 10.8224, 10.7349, 10.8426, 10.7559, 10.8631, 10.9697,\n 11.0756, 10.9898, 10.9048, 10.8204, 10.9259, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.0728, 10.9906, 10.9091, 11.0125, 10.9317, 10.8515, 10.9545,\n 11.0569, 10.9773, 10.8984, 10.8200, 10.7423, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.7261, 11.8221, 11.9176,\n 11.8429, 11.9380, 11.8638, 11.9586, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What was your weirdest childhood interest?\nQuestion 2: Who are some artists with interesting or inspiring childhoods?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "62", + "# Tokens in Greenlist": "11", + "Fraction of T in Greenlist": "17.7%", + "z-score": "-1.32", + "p value": "0.907", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -1.8856,\n -1.9630, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "190", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "68.9%", + "z-score": "14", + "p value": "9.01e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.0536, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 11.8287, 11.9319,\n 12.0345, 11.9455, 12.0476, 12.1492, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.3754, 12.4746, 12.5732, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.1957, 13.1129, 13.2068, 13.3002, 13.3933, 13.4859, 13.4040,\n 13.4963, 13.5881, 13.5069, 13.5985, 13.6896, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.9700, 13.8904, 13.9797, 14.0687, 13.9897])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What's the easiest way to learn Java programs?\nQuestion 2: What is the easiest way to learn java programming?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.62", + "p value": "0.00437", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 1.2910, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.5164, 1.6828, 1.6166, 1.5511, 1.4863, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.6837, 1.8385, 1.7767, 1.7154, 1.8682, 2.0197, 2.1700, 2.1082,\n 2.0470, 2.1954, 2.1344, 2.2813, 2.4271, 2.5717, 2.5103, 2.4495,\n 2.3891, 2.5318, 2.4717, 2.6131, 2.5532, 2.6933, 2.6336, 2.5744,\n 2.5156, 2.6540, 2.5954, 2.7325, 2.6742, 2.6163, 2.5589, 2.5019,\n 2.6370, 2.5802, 2.5238, 2.4678, 2.6014, 2.5456, 2.4902, 2.6224])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.7133, 5.8812, 6.0469, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.2348, 10.3496, 10.4636, 10.3695, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.9060, 11.8151, 11.9187, 12.0218, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.2410, 13.3361, 13.4308, 13.3447, 13.4390, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.3642, 14.4536, 14.5426, 14.6313, 14.5479, 14.6362, 14.7242, 14.6416,\n 14.7293, 14.8167, 14.9037, 14.9903, 15.0766, 15.1625, 15.2481, 15.3333,\n 15.4182, 15.5028, 15.5870, 15.6709, 15.5900, 15.6736, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: \"Is \"\"rightness\"\" just a judgment of an observer or is it ever a property of an action?\"\nQuestion 2: Does the end justify the means?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.5%", + "z-score": "0.165", + "p value": "0.434", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.2937, -2.3462, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.2226, -2.2735, -2.0656,\n -2.1170, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -1.9757, -1.7865, -1.8363, -1.6499,\n -1.7000, -1.5164, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.0370, -1.0844, -0.9258, -0.9734, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.4147, -0.2756, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.0444, -0.0886, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, 0.0865, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.1684, 0.2940, 0.2513, 0.2089, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "66.9%", + "z-score": "12.2", + "p value": "1.04e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.0219, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 7.7723, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.4560, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.5021, 9.6309, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 9.8064, 9.6995, 9.8237, 9.9469, 9.8416, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.4903, 10.3908, 10.2923, 10.1948, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.3496, 10.2554, 10.3695, 10.4829, 10.5955, 10.5025, 10.6145,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.0897,\n 10.9998, 10.9107, 11.0183, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.1036, 12.2034, 12.1184, 12.2178, 12.3167, 12.2325])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Bihar assembly election 2015: why the NDA- led government was not successful in their approach. In future what things should they keep in mind so that they don't make the same mistake again?\nQuestion 2: Will Narendra Modi win 400+ seat in L S 2019 ?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.6997, 1.6239, 1.5492,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.6854, 1.6164, 1.7865, 1.7178, 1.8856,\n 2.0517, 1.9829, 1.9149, 1.8475, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.9355, 1.8716, 2.0276, 2.1822,\n 2.1182, 2.0548, 2.2074, 2.1442, 2.2952, 2.2323, 2.1700, 2.3190,\n 2.4669, 2.4045, 2.5508, 2.6961, 2.6336, 2.5717, 2.5103, 2.4495,\n 2.3891, 2.3293, 2.4717, 2.4121, 2.5532, 2.6933, 2.6336, 2.5744,\n 2.5156, 2.4574, 2.5954, 2.5373, 2.4797, 2.6163, 2.7520, 2.6943,\n 2.8288, 2.7713, 2.7143, 2.6576, 2.6014, 2.5456, 2.6781, 2.6224,\n 2.5672, 2.5123, 2.6433, 2.5886, 2.5343, 2.6640, 2.7930, 2.9212,\n 2.8666, 2.9938, 2.9394, 2.8853, 2.8316, 2.7783, 2.9040, 2.8508,\n 2.7979, 2.7454, 2.8698, 2.9935, 2.9410, 2.8887, 2.8368, 2.7852,\n 2.9076, 2.8561, 2.9776, 2.9263, 2.8752, 2.9957, 2.9448, 3.0645,\n 3.0138, 2.9633, 2.9132, 2.8633, 2.8137, 2.9320, 2.8825, 2.8333,\n 2.9507, 3.0674, 3.0182, 3.1342, 3.0851, 3.0363, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.1389, 5.9530, 5.7735, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.9479, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.0553, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.1813, 7.3233, 7.2232, 7.1243, 7.0268, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.0211, 7.1591, 7.0662, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.0657, 6.9768, 6.8889, 7.0231, 6.9361, 7.0692, 6.9830,\n 7.1149, 7.0296, 6.9451, 6.8615, 6.7788, 6.9094, 7.0391, 6.9570,\n 6.8757, 7.0043, 6.9237, 6.8439, 6.7648, 6.8922, 7.0187, 6.9402,\n 6.8624, 6.7854, 6.9107, 6.8343, 6.9587, 7.0823, 7.0063, 6.9310,\n 6.8564, 6.7823, 6.7089, 6.6361, 6.7584, 6.6861, 6.8075, 6.7358,\n 6.6645, 6.7850, 6.7143, 6.8339, 6.9529, 7.0711, 7.0006, 6.9307,\n 7.0481, 6.9786, 7.0952, 7.0262, 6.9577, 7.0735, 7.1885, 7.3030,\n 7.4168, 7.5299, 7.4616, 7.3937, 7.5061, 7.6179, 7.7291, 7.6615,\n 7.7720, 7.8820, 7.9913, 8.1001, 8.0328, 8.1410, 8.0741, 8.1817,\n 8.2887, 8.2221, 8.3286, 8.4345, 8.3683, 8.4736, 8.4078, 8.3423,\n 8.4471, 8.5513, 8.4862, 8.5899, 8.5252, 8.6284, 8.7311, 8.8333,\n 8.9351, 8.8706, 8.9718, 8.9077, 9.0085, 9.1088, 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the best websites for entrepreneurs?\nQuestion 2: What are the best websites for entrepreneur?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.1111, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.2179, -1.2566, -1.1279, -1.0000,\n -1.0390, -1.0777, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 7.8520, 7.7555, 7.6603, 7.5661, 7.4730,\n 7.6064, 7.5143, 7.4233, 7.5556, 7.4655, 7.5967, 7.5076, 7.6376,\n 7.7667, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.0453, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 9.8197, 9.7405,\n 9.8486, 9.7701, 9.6921, 9.6148, 9.7224, 9.6456, 9.5695, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.5577, 9.4837, 9.5896, 9.6948, 9.6214,\n 9.5485, 9.4761, 9.4042, 9.5089, 9.4375, 9.3665, 9.4707, 9.4002,\n 9.3302, 9.2607, 9.1916, 9.2952, 9.2265, 9.1584, 9.2613, 9.3638,\n 9.2960, 9.3980, 9.3306, 9.2637, 9.3651, 9.4661, 9.5666, 9.5000,\n 9.6000, 9.6996, 9.7987, 9.7325, 9.8311, 9.7653, 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the National nanotechnology initiative?\nQuestion 2: What is the lead time for SSN4EGS411 board?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.5430, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.4071, -1.2516, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.7143, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.3800, 0.3369, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.6702, 8.5347, 8.4017, 8.5491, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 11.0810, 10.9669, 11.0851, 10.9727, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.1033, 11.2187, 11.1111, 11.0047, 11.1197, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.6894, 11.7992, 11.9083, 11.8058,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.7660, 12.8679, 12.9692, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.5714, 13.6679, 13.5746, 13.4822, 13.5784, 13.4868,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.1510, 14.0619, 14.1543, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.7002, 14.6126, 14.7023, 14.7916, 14.8804, 14.9689, 15.0570, 15.1448,\n 15.0585, 15.1460, 15.2331, 15.3198, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.5783, 15.6633, 15.7481, 15.6640, 15.7485, 15.6651, 15.5823, 15.6667,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Chown: invalid user: \u2018ubuntu: ubuntu\u2019?\nQuestion 2: Do any popular Quorans gain financially through Quora?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.7408, 1.9795, 1.8728, 1.7685, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.2910,\n 1.2189, 1.4027, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.8452, 1.0094, 0.9488, 0.8889, 0.8295, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.4652, 0.4174, 0.3698, 0.5069, 0.6430, 0.5952, 0.5477,\n 0.6825, 0.6351, 0.7688, 0.9017, 1.0338, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.1852, 1.1380, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.0328, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 1.1339, 1.0890, 1.2115, 1.3333,\n 1.4546, 1.4093, 1.5298, 1.6496, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "68.5%", + "z-score": "14.1", + "p value": "1.67e-45", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.7181, 9.6011,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.5027, 10.6232, 10.7429, 10.8616, 10.9794, 10.8699,\n 10.7616, 10.8790, 10.9955, 11.1111, 11.0047, 11.1197, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.4675, 11.3644, 11.4759, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.9288, 11.8299, 11.9370, 12.0433,\n 11.9457, 11.8491, 11.9551, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.7875, 12.8877, 12.7943, 12.7017,\n 12.6099, 12.5188, 12.4286, 12.3391, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.5615, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.7270, 13.6427, 13.7350, 13.8270, 13.9185, 14.0096, 13.9262,\n 14.0170, 13.9343, 13.8522, 13.9427, 13.8613, 13.7803, 13.6999, 13.7904,\n 13.7106, 13.8007, 13.7215, 13.8113, 13.9007, 13.9897, 14.0784, 14.0000,\n 14.0884, 14.1764, 14.0986, 14.0214, 14.1091])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are overlap lengths for columns, beam, slab etc?\nQuestion 2: What is the length of rebars on beams between slabs?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.9575, -2.9933, -3.0290,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.3938, 2.2156, 2.5281, 2.8284,\n 3.1177, 2.9439, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.2205, 3.4641, 3.7017, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.6210, 7.7555, 7.8889, 7.7937, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.3625, 8.4868, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.8958, 9.0134, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.2867, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.4916, 9.6016, 9.5224, 9.4438, 9.5532,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.9067, 9.8293, 9.9357, 9.8590,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.2029, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.2872, 10.2132, 10.3154, 10.2419, 10.3435, 10.4447,\n 10.3717, 10.2993, 10.3999, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.8505, 10.9480, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.2171, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What type of government did Sparta have?\nQuestion 2: What types of government did Aristotle want?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.1470, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.7187, -2.7541, -2.7894, -2.8245, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.0186, 4.8742, 5.0779, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 6.8641, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.1813, 7.3233, 7.2232, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.5818, 7.4853, 7.6210, 7.7555, 7.8889, 7.7937, 7.6995,\n 7.8318, 7.9630, 7.8699, 8.0000, 8.1291, 8.0370, 7.9460, 8.0741,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.2588, 8.1731, 8.2956, 8.2107, 8.1266, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.3231, 8.2413, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.5516, 8.6677, 8.5879, 8.5088, 8.6241, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 8.8448, 8.7681, 8.6921, 8.8039,\n 8.7284, 8.8396, 8.7647, 8.8752, 8.9851, 8.9107, 9.0200, 9.1287,\n 9.2368, 9.3443, 9.2704, 9.1970, 9.3040, 9.2311, 9.3374, 9.2651,\n 9.3708, 9.4761, 9.5808, 9.6850, 9.7886, 9.8918, 9.9944, 9.9224,\n 9.8510, 9.9531, 9.8821, 9.9837, 9.9132, 10.0143, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.2743, 10.3730, 10.4713, 10.5692, 10.5000,\n 10.4312, 10.5286, 10.6256, 10.7222, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the saddest thing about you, and why?\nQuestion 2: What is the happiest thing about you? What is the saddest thing about you?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "18", + "Fraction of T in Greenlist": "27.7%", + "z-score": "0.501", + "p value": "0.308", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 5.7664, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 8.0632, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.1754, 10.2923, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.3496, 10.4636, 10.5769, 10.4829, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.0897,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.5615, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.2633, 14.3537, 14.4437, 14.5333, 14.6225,\n 14.7113, 14.7998, 14.8878, 14.9755, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.4085, 15.4940, 15.5792, 15.6640, 15.7485, 15.8327, 15.9165, 16.0000,\n 16.0832, 16.1660, 16.2486, 16.3308, 16.4127, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is it safe to travel to Italy now?\nQuestion 2: Is it safe to travel through Italy?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "21.5%", + "z-score": "-0.837", + "p value": "0.799", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.8372])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.2485, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.4065, 6.5607, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.1996, 6.1012, 6.2517, 6.1546,\n 6.3035, 6.4510, 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.7414, 6.8819, 6.7890, 6.6973, 6.6066, 6.7456, 6.6559, 6.5672,\n 6.7049, 6.8414, 6.7536, 6.8889, 6.8019, 6.9361, 7.0692, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.6328, 7.7598, 7.8859, 8.0111,\n 7.9254, 8.0497, 7.9649, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.7610, 8.6783, 8.7952, 8.9113,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.0416,\n 9.9648, 9.8887, 9.8131, 9.9184, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.1846, 10.2872, 10.3893, 10.3154, 10.4170, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.5998, 10.5278, 10.4563, 10.5556,\n 10.4846, 10.4140, 10.3439, 10.4427, 10.3730, 10.4713, 10.4021, 10.3333,\n 10.2650, 10.3628, 10.2949, 10.2273, 10.1602, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is Quora a parallel universe?\nQuestion 2: Are there any parallel universes?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "178", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "27.0%", + "z-score": "0.606", + "p value": "0.272", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.6997, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.7552, 1.9262, 1.8559, 1.7865, 1.7178, 1.8856,\n 2.0517, 1.9829, 1.9149, 2.0785, 2.0107, 1.9437, 1.8773, 2.0381,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.5681, 1.5097, 1.4517, 1.6008, 1.5430, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.6058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.0381, 1.8889, 2.1822, 2.0370, 1.8974, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.6681, 2.8943, 2.7791, 2.6667,\n 2.8868, 3.1027, 3.3147, 3.2026, 3.0929, 3.2998, 3.5032, 3.3947,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.4743, 3.6662, 3.5642, 3.7528,\n 3.6522, 3.5533, 3.4562, 3.6407, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.6187, 3.7916, 3.9624, 3.8730,\n 4.0415, 4.2080, 4.3727, 4.5356, 4.6967, 4.6070, 4.7662, 4.9237,\n 4.8347, 4.7469, 4.6603, 4.5747, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.8655, 4.7823, 4.7001, 4.8497, 4.9980, 4.9163, 4.8355, 4.7556,\n 4.6765, 4.8226, 4.7442, 4.8889, 4.8111, 4.9543, 4.8772, 5.0190,\n 4.9424, 5.0829, 5.0070, 5.1461, 5.0707, 4.9960, 4.9221, 4.8488,\n 4.7763, 4.9135, 5.0496, 4.9774, 4.9058, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.4983, 5.6282, 5.7572, 5.8853, 5.8138, 5.9409,\n 5.8698, 5.9960, 6.1213, 6.2458, 6.1750, 6.1047, 6.2282, 6.1584,\n 6.2810, 6.2116, 6.3333, 6.4543, 6.5745, 6.5054, 6.4368, 6.5561,\n 6.6747, 6.7925, 6.7242, 6.8413, 6.9577, 6.8897, 6.8222, 6.9378,\n 6.8707, 6.9856, 7.0998, 7.0330, 6.9667, 7.0801, 7.1929, 7.1270,\n 7.2391, 7.3506, 7.2849, 7.3958, 7.5061, 7.6158, 7.7249, 7.6594,\n 7.5944, 7.7028, 7.6381, 7.5738, 7.6816, 7.7889, 7.8956, 7.8316,\n 7.7679, 7.7047, 7.8107, 7.9162, 8.0212, 8.1258, 8.0627, 8.1667,\n 8.1039, 8.2074, 8.3103, 8.2479, 8.3503, 8.4523, 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why is circuit switching important?\nQuestion 2: What is the difference between a virtual circuit and a circuit switch?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.0215,\n -1.0612, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.4772,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 6.8419, 7.0014, 6.8810, 6.7626, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.4138, 8.5448, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.7927, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.3408, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.5400, 9.6566, 9.5668, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.0133, 10.1243, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.3333, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.7242, 10.8282, 10.7480, 10.8515, 10.9545,\n 11.0569, 10.9773, 11.0793, 11.0004, 10.9220, 11.0235, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.5444, 11.4674, 11.3910, 11.4891,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.6297, 11.7261, 11.6514, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.0935, 12.0209, 12.1141, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some misconceptions, and truths that are believed about Iran?\nQuestion 2: What are some things people believe about Ireland but are laughably far from the truth?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.6433, 0.5871, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.8296, 0.9812, 0.9258, 0.8709, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.2060, 1.1514, 1.0973, 1.0435, 1.1882,\n 1.3318, 1.2778, 1.2243, 1.3663, 1.3128, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.7679, 1.7158, 1.6641, 1.7970, 1.7454, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.9837, 1.9327, 1.8821, 2.0105, 1.9599,\n 1.9097, 2.0369, 1.9868, 2.1131, 2.0631, 2.0134, 2.1385, 2.0889,\n 2.0396, 2.1637, 2.1145, 2.0656, 2.1886, 2.1398, 2.0913, 2.2133,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.3098, 2.2618, 2.2141, 2.1667,\n 2.1195, 2.0726, 2.1913, 2.3094, 2.2624, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.3058, 6.1828, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 8.9178, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.7224, 9.6307, 9.5400, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.5057, 9.4185, 9.3320, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.5145, 10.4341, 10.5393, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.5366, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.6404, 10.5654, 10.4909, 10.4170, 10.3435, 10.4447,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.6990, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.0450, 10.9740, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.4525, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why do most Bollywood movies contain too many sex scenes? Is it because the Bollywood audience are so fond of sex? Are they always horny?\nQuestion 2: What are the different types of nuclear families? How do they all differ?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -1.0906, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.8040, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.7127,\n -0.5774, -0.6199, -0.4857, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.4747, -0.5164, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.0420, 0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 6.8718, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.1392, 11.0368, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.6041, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.0516, 11.9551, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.4834, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 12.9011, 12.8095, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.3059, 13.2166, 13.3128, 13.4086, 13.5039, 13.5987, 13.6931,\n 13.7870, 13.6990, 13.6117, 13.7054, 13.7986, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.0813, 13.9959, 14.0872, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.4536, 14.3700, 14.4591, 14.5479, 14.6362, 14.7242, 14.8119,\n 14.8991, 14.8167, 14.7348, 14.8219, 14.9086, 14.9950, 15.0810, 15.1667,\n 15.2520, 15.1712, 15.0909, 15.1761, 15.2609, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How should I make myself brave?\nQuestion 2: How can I be more brave?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, 0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.0413, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.3791, 6.2075,\n 6.0412, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.2619, 8.1550, 8.0495, 7.9455, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.0483, 7.9495, 8.0829, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 10.0133, 9.9278, 10.0389, 9.9542, 9.8702, 9.7869, 9.7044, 9.6225,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.7908, 9.7109, 9.8197, 9.7405,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.7224, 9.8293, 9.9357, 10.0416,\n 9.9648, 9.8887, 9.9940, 9.9184, 10.0231, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.4909, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.4766, 11.4047, 11.3333,\n 11.2624, 11.1919, 11.1218, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which is the best Nuru massage parlour in Bangkok?\nQuestion 2: What is Nuru massage?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.01", + "p value": "0.978", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -1.8665, -1.7041, -1.7488, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.7990, -1.8411, -1.8829, -1.7321,\n -1.7740, -1.8157, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -1.8892, -1.9291, -1.9688, -2.0083])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.4425, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.1355, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.6359, 8.7629, 8.6667, 8.7927, 8.9178, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.8494, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.0389, 10.1494, 10.2592, 10.3683, 10.4769, 10.5848,\n 10.5002, 10.6076, 10.7143, 10.8204, 10.7367, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.7006, 10.8051, 10.9091, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.2602, 11.3610, 11.4614, 11.5613, 11.6606, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.7200, 11.8176, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.7808, 11.7050, 11.8014, 11.7261, 11.6514, 11.5771,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.1141, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the reasons that people dislike Hillary Clinton?\nQuestion 2: Why do so many people say Hillary Clinton is evil?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "106", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-2.36", + "p value": "0.991", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.4010, -2.4462, -2.2646,\n -2.3101, -2.3552])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, 0.2722, 0.6623, 1.0328, 1.3859, 1.7233, 2.0466, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.3221, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 7.8598, 8.0042,\n 7.8905, 7.7784, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.5715, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.7622, 9.8753, 9.9878, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.3923,\n 10.5002, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.7006, 10.6196, 10.5393, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.6397, 10.5625, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 11.1173, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.5033, 11.4300, 11.5261, 11.6217, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.9273, 11.8551, 11.7833, 11.7120, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do obtain telegram groups link?\nQuestion 2: I created a Telegram group but I could not find the option to Add Admins. Why?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "153", + "Fraction of T in Greenlist": "76.9%", + "z-score": "16.9", + "p value": "2.14e-64", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 9.8015, 9.9351, 9.8058, 9.9384, 9.8116,\n 9.9433, 9.8187, 9.9495, 10.0791, 10.2075, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 11.7762, 11.8885, 12.0000, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.6509, 12.5434, 12.6496, 12.7551, 12.6491,\n 12.7542, 12.8586, 12.9624, 13.0656, 12.9616, 12.8586, 12.7567, 12.6557,\n 12.7590, 12.8618, 12.9639, 13.0655, 13.1665, 13.2669, 13.1680, 13.2680,\n 13.3674, 13.4664, 13.5647, 13.6626, 13.7599, 13.8567, 13.9531, 14.0489,\n 14.1442, 14.2390, 14.1429, 14.2374, 14.3314, 14.4250, 14.5181, 14.6107,\n 14.5162, 14.6086, 14.7005, 14.7920, 14.8831, 14.9737, 15.0639, 15.1537,\n 15.2430, 15.3320, 15.2397, 15.3284, 15.2369, 15.3254, 15.2348, 15.3230,\n 15.2332, 15.3211, 15.4087, 15.4959, 15.4072, 15.3191, 15.4062, 15.4929,\n 15.5793, 15.6653, 15.5783, 15.6641, 15.7495, 15.8345, 15.9193, 16.0036,\n 16.0877, 16.1713, 16.2547, 16.3377, 16.4205, 16.5028, 16.5849, 16.6667,\n 16.5819, 16.6634, 16.7447, 16.6607, 16.7417, 16.8225, 16.9030])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: When/how did you realize were not straight?\nQuestion 2: When/how did you realize you were gay/bisexual? Were you in denial?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.6713, -0.7201, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.7641, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.7065, -0.7461, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.3560, 8.4984, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.2147, 9.1101, 9.0067, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.4563, 9.3582, 9.2611, 9.1652,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.2975, 10.2062,\n 10.3191, 10.2287, 10.1391, 10.2514, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.2106, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.9341, 12.0341, 11.9504, 12.0499,\n 11.9669, 12.0660, 11.9837, 12.0824, 12.1805, 12.0990, 12.0180, 11.9377,\n 12.0355, 12.1329, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.5495, 12.4713, 12.5657, 12.4880, 12.5820, 12.5049, 12.4283,\n 12.5221, 12.4460, 12.3705, 12.2954, 12.2209, 12.3143, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: I don't remember my password, phone number nor my login details of my previous Yahoo email account. How can I access it?\nQuestion 2: How do you reset your Yahoo! password?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "177", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "24.9%", + "z-score": "-0.0434", + "p value": "0.517", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -1.7823, -1.8542, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.3301, -2.3851, -2.4394, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -1.9052,\n -1.9596, -2.0135, -2.0667, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.8511, -0.6983, -0.7454, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, 0.0000,\n -0.0434])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "112", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "59.8%", + "z-score": "8.51", + "p value": "8.66e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660, 2.8868,\n 2.5560, 2.9593, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094, 2.1004, 2.4495,\n 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284, 2.6558, 2.4910, 2.7778,\n 3.0551, 3.3235, 3.5839, 3.4219, 3.6742, 3.5176, 3.3665, 3.2205, 3.4641,\n 3.3221, 3.5590, 3.4207, 3.2863, 3.1558, 3.0290, 3.2577, 3.1334, 3.3566,\n 3.5753, 3.4528, 3.3333, 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 4.2426,\n 4.1260, 4.3205, 4.5118, 4.7002, 4.5850, 4.4721, 4.3614, 4.5461, 4.7281,\n 4.9075, 5.0844, 5.2590, 5.1490, 5.0410, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.5035, 5.4000, 5.5630, 5.4610, 5.6220, 5.7812, 5.6804, 5.8377,\n 5.9932, 6.1471, 6.2994, 6.4501, 6.3502, 6.2517, 6.4008, 6.5483, 6.6944,\n 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.3068, 7.4449, 7.5818, 7.7174,\n 7.6210, 7.7555, 7.6603, 7.7937, 7.9259, 8.0571, 8.1873, 8.0931, 8.2222,\n 8.3503, 8.4774, 8.3843, 8.5105])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why does the Malayalam movie Premam create such a buzz?\nQuestion 2: Do Tamils usually watch Malayalam movies?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.6407, -1.6865,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.6775, -1.5191, -1.5637, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -1.9291, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.2384, -2.1086, -2.1444, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.4816, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.6571, 4.8394, 5.0190, 4.9075,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.4909, 5.6585, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.6667, 5.5630, 5.7242, 5.6220, 5.5213, 5.4222,\n 5.5811, 5.4832, 5.6401, 5.7955, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.3035, 6.2075, 6.3549, 6.2601, 6.4059, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.8819, 7.0211, 7.1591, 7.0662, 6.9743, 7.1110, 7.0201,\n 7.1556, 7.2900, 7.2001, 7.3333, 7.4655, 7.3765, 7.5076, 7.4194,\n 7.3322, 7.2459, 7.3758, 7.5048, 7.4193, 7.5472, 7.4625, 7.5895,\n 7.5056, 7.6315, 7.7566, 7.8808, 8.0042, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.1214, 8.2413, 8.1602, 8.2793, 8.3977, 8.3172,\n 8.2375, 8.3550, 8.4718, 8.3927, 8.3143, 8.4303, 8.3525, 8.2754,\n 8.3906, 8.5052, 8.6190, 8.7323, 8.6556, 8.7681, 8.8800, 8.8039,\n 8.7284, 8.6535, 8.7647, 8.6903, 8.8008, 8.9107, 9.0200, 9.1287,\n 9.0548, 9.1629, 9.2704, 9.1970, 9.1242, 9.2311, 9.1587, 9.0869,\n 9.1932, 9.2990, 9.4042, 9.5089, 9.4375, 9.5416, 9.6452, 9.7483,\n 9.6774, 9.7800, 9.7095, 9.8116, 9.9132, 10.0143, 10.1149, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.2743, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.6256, 10.7222, 10.8184, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: When should you lose your virginity?\nQuestion 2: Why did you lose your virginity?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "66", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "21.2%", + "z-score": "-0.711", + "p value": "0.761", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 5.2485, 5.1326, 5.0190, 5.1962,\n 5.0844, 4.9747, 4.8669, 5.0410, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.4000, 5.5630, 5.4610, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.9932, 5.8936, 5.7955, 5.9491, 6.1012, 6.0041, 6.1546,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640,\n 6.5069, 6.4153, 6.5569, 6.4663, 6.6066, 6.5169, 6.6559, 6.5672,\n 6.4795, 6.3928, 6.5303, 6.6667, 6.8019, 6.9361, 6.8500, 6.7648,\n 6.8977, 6.8133, 6.7298, 6.6471, 6.7788, 6.6968, 6.8274, 6.9570,\n 7.0857, 7.0043, 6.9237, 7.0513, 7.1779, 7.0980, 7.2236, 7.1443,\n 7.2691, 7.3930, 7.5161, 7.4373, 7.3592, 7.4813, 7.6026, 7.5251,\n 7.6456, 7.7653, 7.6883, 7.8072, 7.9253, 7.8489, 7.7732, 7.6980,\n 7.8153, 7.9318, 7.8571, 7.9729, 7.8988, 8.0139, 7.9403, 8.0546,\n 7.9816, 7.9091, 7.8372, 7.9507, 8.0636, 8.1758, 8.2874, 8.2158,\n 8.1448, 8.2557, 8.1851, 8.1150, 8.0455, 8.1556, 8.0865, 8.1960,\n 8.3050, 8.4133, 8.3446, 8.2762, 8.3840, 8.4911, 8.4232, 8.5298,\n 8.4623, 8.5683, 8.6738, 8.7788, 8.7116, 8.6448, 8.7492, 8.8531,\n 8.7867, 8.8900, 8.9929, 8.9268, 9.0292, 9.1310, 9.0653, 9.0000,\n 8.9351, 9.0364, 9.1372, 9.0726, 9.1730, 9.1088, 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is there a correlation between Trump supporters and IQ?\nQuestion 2: What is Donald Trump's IQ?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "25.3%", + "z-score": "0.0634", + "p value": "0.475", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.0642, 0.1275, 0.0634])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 3.9337, 3.7905, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.8857, 4.7703, 4.9528, 4.8394, 5.0190, 4.9075,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.4909, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.6667, 5.8279, 5.9874, 5.8835, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.1996, 6.1012, 6.0041, 5.9084,\n 5.8139, 5.9641, 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.3640,\n 6.5069, 6.4153, 6.5569, 6.4663, 6.6066, 6.5169, 6.6559, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.2443, 7.1563, 7.0692, 6.9830,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.4193, 7.3346, 7.4625, 7.5895,\n 7.5056, 7.4225, 7.3402, 7.4661, 7.5910, 7.5094, 7.6335, 7.7567,\n 7.6758, 7.7981, 7.9196, 7.8393, 7.7597, 7.8803, 8.0002, 7.9212,\n 8.0402, 7.9619, 8.0801, 8.1976, 8.3143, 8.4303, 8.5456, 8.6603,\n 8.5824, 8.6963, 8.8095, 8.9221, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.0257, 9.1357, 9.0601, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.2368, 9.1629, 9.2704, 9.3774, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 11.0913, 11.0194, 10.9480, 10.8770, 10.8064, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.7222, 10.8184, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What the best science documentaries?\nQuestion 2: What are some good science documentaries?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.1275, -0.1901, 0.0000, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.9671, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.4171, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.4289,\n 1.3725, 1.3166, 1.4629, 1.6081, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.2257, 1.3625, 1.3112,\n 1.4470, 1.5818, 1.5303, 1.4792, 1.6127, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.3768, 1.5055, 1.4570, 1.5848, 1.7119, 1.6632, 1.6148, 1.7408,\n 1.8660, 1.9906, 1.9419, 2.0656, 2.0170, 2.1398, 2.0913, 2.2133,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.3098, 2.2618, 2.3812, 2.5000,\n 2.6182, 2.5700, 2.5220, 2.4744, 2.5915, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 6.8810, 7.0387, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.0553, 7.2016, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.3485, 7.2532, 7.1591, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.8699, 8.0000, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.6924, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.7908, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.5840, 9.5066, 9.4299, 9.5381, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.9184, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.9178, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.0913, 11.1883, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why didn't intelligence develop in its highest form in underwater creatures?\nQuestion 2: How did most creatures develop noses? What was the starting point?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.0294, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.0702, -2.1241, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -1.7638, -1.8161, -1.8677, -1.9189, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.6499,\n -1.4655, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.6081, -1.4518, -1.2968, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -1.8598, -1.8985, -1.9370, -1.7997, -1.8383, -1.7021, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.6466, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.5752, -1.4471, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.6009, 5.7735,\n 5.9438, 6.1118, 5.9954, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.9495, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.0060, 8.9178, 8.8304, 8.9496, 8.8631, 8.7773,\n 8.8958, 8.8108, 8.9285, 8.8443, 8.9612, 9.0773, 8.9940, 8.9113,\n 8.8294, 8.7482, 8.8636, 8.7831, 8.7033, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.0340, 9.1452, 9.0679, 8.9912,\n 9.1018, 9.2118, 9.3212, 9.2450, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.4513, 9.5577, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.9038, 9.8303, 9.9340, 10.0371, 9.9642, 9.8918, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.0547, 10.1558, 10.0848, 10.0143, 10.1149, 10.0448,\n 9.9752, 10.0753, 10.0061, 9.9374, 9.8691, 9.9687, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.4603, 10.3923, 10.4893, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why do we, as human beings, use water for?\nQuestion 2: What do we use water for?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.7048, -0.5620,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.4857, -0.3522, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.2955, -0.1684, -0.2100, -0.2513, -0.1253, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 5.8140, 5.6830, 5.8635,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 7.9455, 7.8428, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.4116, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 9.1273, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.4501, 9.3611, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.4474, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.0611, 10.1692, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.3284, 10.4341, 10.5393, 10.4596, 10.3805, 10.3020, 10.4067,\n 10.3289, 10.4330, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.8170, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.3099, 11.2366,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How would you spend the last 10 days of your life?\nQuestion 2: Where do you want to spend your last days of life?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "20.5%", + "z-score": "-0.878", + "p value": "0.81", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.0887,\n -0.8785])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.1240, 7.0014, 7.1590, 7.0387, 6.9204, 6.8041,\n 6.6898, 6.8458, 7.0000, 6.8876, 6.7769, 6.9294, 7.0803, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.3503, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.0060, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.9625, 9.8753, 9.7890, 9.7034,\n 9.6186, 9.7312, 9.6471, 9.5638, 9.4812, 9.5931, 9.7044, 9.6225,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.0076, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.1695, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 10.9458, 10.8686,\n 10.7920, 10.7159, 10.8170, 10.9176, 10.8421, 10.7671, 10.8673, 10.9669,\n 10.8925, 10.8186, 10.9178, 11.0165, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.4300, 11.3572, 11.4533, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How many Parrotheads are there in USA and Canada?\nQuestion 2: Which technology will win the OLED vs LCD battle?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -0.9631, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -0.8489, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -0.8292, -0.6963,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.7139, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.6993, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 9.8187, 9.9495, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.2283, 10.3532, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.6232, 10.7429, 10.8616, 10.7518, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.6667, 10.7835, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.9229, 10.8215, 10.7211, 10.6218, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.8801, 10.9917, 10.8960, 11.0070, 10.9123, 11.0227,\n 10.9291, 11.0389, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.1971, 11.3039, 11.2142, 11.3204, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.7696, 11.8719, 11.9737, 12.0749, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.6234, 12.7199, 12.8160, 12.7329, 12.8285,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.4499,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 14.0000,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.1091, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why are nice people always left alone?\nQuestion 2: How do I accept that I will always be alone?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.4697, -1.5361, -1.6013, -1.6654, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.3222, -1.3663, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.2423, -1.2857, -1.3288, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.2210, -1.2623, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.2326, -1.2730, -1.1380, -1.1784, -1.2185,\n -1.0849, -0.9520, -0.9925, -1.0328, -1.0729, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "153", + "Fraction of T in Greenlist": "76.9%", + "z-score": "16.9", + "p value": "2.14e-64", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.8320, 7.6613, 7.4952, 7.6667,\n 7.8355, 8.0017, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.6423, 8.7943, 8.6461, 8.7967, 8.9455, 9.0924, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.6612, 9.5229, 9.6612, 9.5258,\n 9.6630, 9.7989, 9.9333, 10.0664, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.4565, 10.5838, 10.7099, 10.8350, 10.9589, 10.8328, 10.9560, 11.0782,\n 11.1994, 11.3196, 11.4388, 11.5570, 11.6743, 11.7907, 11.6693, 11.7851,\n 11.6656, 11.7809, 11.8953, 11.7779, 11.8918, 12.0049, 12.1171, 12.2286,\n 12.3393, 12.4491, 12.3350, 12.4444, 12.5531, 12.4409, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.7597, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 12.9641, 13.0677, 13.1707, 13.0656, 13.1681, 13.2701, 13.3714, 13.4722,\n 13.5724, 13.4694, 13.5693, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.7621, 13.8595, 13.9565, 14.0530, 14.1489, 14.2443, 14.3393, 14.4338,\n 14.5277, 14.4301, 14.5238, 14.6170, 14.7098, 14.8021, 14.8940, 14.9854,\n 15.0763, 15.1669, 15.0715, 15.1618, 15.0674, 15.1574, 15.2470, 15.1537,\n 15.2430, 15.3320, 15.4206, 15.5087, 15.4167, 15.5046, 15.5922, 15.5012,\n 15.4108, 15.4983, 15.5853, 15.6720, 15.7584, 15.6692, 15.7553, 15.8411,\n 15.9264, 16.0115, 15.9235, 16.0083, 16.0928, 16.0057, 16.0900, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.3377, 16.4205, 16.3353, 16.4178, 16.5000,\n 16.5819, 16.6634, 16.5793, 16.6607, 16.7417, 16.8225, 16.9030])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How can I join to IB India?\nQuestion 2: How do I get into IB india?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "18.4%", + "z-score": "-1.87", + "p value": "0.969", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -3.2240, -3.0071, -2.7932, -2.8402,\n -2.8868, -2.9329, -2.9785, -3.0237, -3.0685, -3.1129, -3.1568, -2.9542,\n -2.9988, -3.0429, -2.8446, -2.8893, -2.9336, -2.9775, -3.0210, -3.0641,\n -3.1069, -3.1493, -3.1914, -3.2332, -3.2746, -3.3156, -3.3564, -3.3968,\n -3.2116, -3.2525, -3.2931, -3.3333, -3.3733, -3.1928, -3.0140, -2.8368,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.6154, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.7775, -2.6128, -2.6536,\n -2.4908, -2.5318, -2.3708, -2.2111, -2.2528, -2.2943, -2.3354, -2.1783,\n -2.2197, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.0761, -1.9245,\n -1.9658, -1.8157, -1.8571, -1.8983, -1.7500, -1.7913, -1.8324, -1.8732])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 5.9132, 6.0883, 5.9628, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.0469, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.3638, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.2488, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.0000, 7.9079, 7.8168, 7.9460, 8.0741,\n 7.9839, 8.1111, 8.2372, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.4057, 8.3231, 8.4423, 8.5607, 8.6783, 8.7952, 8.9113,\n 9.0267, 8.9448, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 9.8197, 9.7405,\n 9.8486, 9.7701, 9.6921, 9.6148, 9.5381, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.7380, 9.8433, 9.7688, 9.8736, 9.9778,\n 10.0814, 10.1846, 10.1106, 10.0371, 9.9642, 10.0668, 10.1690, 10.0965,\n 10.0245, 9.9531, 10.0547, 10.1558, 10.2565, 10.3566, 10.4563, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.7090, 10.8064, 10.9034, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.3120, 11.2424, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some examples of parasitic bacteria?\nQuestion 2: What are some examples of typical bacteria?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.67", + "p value": "2.09e-22", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.6790, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.4721, 4.3614, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.1219, 4.0205, 4.2008, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.2571, 4.1612, 4.3333, 4.5034, 4.6715, 4.8375, 4.7419, 4.9058,\n 4.8113, 4.9731, 4.8797, 5.0395, 5.1977, 5.3541, 5.2614, 5.4160,\n 5.3243, 5.2338, 5.3865, 5.2970, 5.4480, 5.5976, 5.5088, 5.6569,\n 5.5690, 5.7155, 5.8606, 6.0044, 6.1470, 6.0596, 6.2008, 6.3408,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.8019, 6.9361, 6.8500, 6.7648,\n 6.8977, 7.0296, 6.9451, 7.0759, 7.2058, 7.1220, 7.2508, 7.1678,\n 7.0857, 7.2134, 7.1319, 7.0513, 6.9714, 7.0980, 7.0187, 6.9402,\n 6.8624, 6.9879, 7.1125, 7.2363, 7.1590, 7.2818, 7.2051, 7.3271,\n 7.4483, 7.5687, 7.4924, 7.6120, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.0070, 8.1229, 8.2381, 8.3526, 8.2772, 8.3910, 8.5041, 8.6166,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.9851, 9.0944, 9.2032, 9.1287,\n 9.2368, 9.3443, 9.4513, 9.3774, 9.3040, 9.2311, 9.3374, 9.4432,\n 9.3708, 9.2990, 9.2276, 9.1567, 9.0863, 9.1915, 9.1215, 9.0520,\n 9.1566, 9.0876, 9.0190, 8.9509, 8.8832, 8.9872, 9.0906, 9.1936,\n 9.1262, 9.2287, 9.1617, 9.0952, 9.1971, 9.1310, 9.2324, 9.3333,\n 9.4338, 9.5338, 9.6334, 9.7325, 9.6666])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some good ways for international students in the USA to overcome culture shock?\nQuestion 2: How can you overcome the depression, homesickness and anxiety of culture shock?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.9258,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.0265, 1.2366, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 1.0289, 0.9631, 0.8980, 0.8337, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 0.8003, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 1.0284, 1.1711, 1.1183, 1.0659, 1.2070, 1.1547,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.5303, 1.4792, 1.4284, 1.3779, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.2136, 1.3443, 1.2956, 1.2472,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.2883, 1.2435, 1.1990, 1.3197, 1.4397, 1.3950, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.8058, 9.6786, 9.8116,\n 9.6867, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 9.9656, 9.8473,\n 9.9754, 9.8590, 9.7442, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.2706, 10.1614, 10.2833, 10.4042, 10.2967, 10.1905,\n 10.0855, 9.9817, 9.8792, 9.7778, 9.8987, 9.7986, 9.9187, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.6490, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.5668, 9.4778, 9.5938,\n 9.7091, 9.6210, 9.5338, 9.6484, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 9.8702, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.2253, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.8176, 11.9147, 11.8373,\n 11.9340, 11.8571, 11.9534, 12.0493, 12.1447, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.5394, 12.6323, 12.5568, 12.6494, 12.5745, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Do employees at Select Income REIT have a good work-life balance? Does this differ across positions and departments?\nQuestion 2: Do employees at Pennsylvania REIT have a good work-life balance? Does this differ across positions and departments?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "69", + "Fraction of T in Greenlist": "34.7%", + "z-score": "3.15", + "p value": "0.000812", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.9909, 0.9316, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.3725, 1.5191, 1.4629, 1.6081, 1.5519, 1.4963, 1.6398, 1.7823,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.7018, 1.8411, 1.7864, 1.9245,\n 1.8699, 2.0068, 1.9524, 1.8983, 2.0338, 2.1685, 2.1143, 2.0605,\n 2.1938, 2.1401, 2.2723, 2.4037, 2.3500, 2.4803, 2.4267, 2.5560,\n 2.5026, 2.4495, 2.5776, 2.7050, 2.6519, 2.5990, 2.7253, 2.6726,\n 2.7979, 2.9225, 2.8698, 2.9935, 2.9410, 3.0638, 3.0114, 2.9593,\n 3.0811, 3.2023, 3.1502, 3.2705, 3.2186, 3.3381, 3.2863, 3.2348,\n 3.3534, 3.4713, 3.4198, 3.3686, 3.3177, 3.4346, 3.3838, 3.3333,\n 3.4494, 3.3990, 3.3489, 3.2991, 3.2496, 3.2004, 3.1514])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 1.2366, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.3206, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.9238, 1.0915, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 1.0721, 1.0141, 0.9567, 0.8997, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.1711, 1.1183, 1.0659, 1.0139, 1.1547,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.3933, 1.3443, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.2982, 1.2514, 1.2049, 1.1587, 1.2839, 1.2377, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.1990, 1.1547, 1.1106, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What if every human can read the other person's thoughts? How would life be in such a scenario?\nQuestion 2: What would happen if every human being can read the thoughts of all other human beings?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.4974, -0.5410, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 7.8628, 8.0238,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.2536,\n 9.1225, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.3086, 9.4425, 9.5751, 9.7065, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.1124, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.5027, 10.6232, 10.5131, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.6667, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.5157, 11.6242, 11.5271, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.6584, 11.7647, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.7376,\n 12.6492, 12.7476, 12.8456, 12.9430, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.5250, 13.4390, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.7270, 13.8193, 13.7350, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.0329, 13.9515, 13.8707, 13.9606,\n 14.0502, 14.1393, 14.0593, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.5162, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which colour of Honda City 2016 is best?\nQuestion 2: Which is better Honda City or Maruti Ciaz?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "24.4%", + "z-score": "-0.183", + "p value": "0.572", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.8165,\n 1.0136, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 0.8716, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.1257, 4.9507, 4.7819, 5.0037, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 6.8931,\n 7.0379, 6.9378, 6.8391, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.5210, 8.6436,\n 8.7652, 8.6770, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.0647, 10.1745, 10.0906, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.6196, 10.7242, 10.8282, 10.9317, 10.8515, 10.9545,\n 11.0569, 10.9773, 11.0793, 11.0004, 11.1018, 11.0235, 10.9458, 11.0468,\n 11.1473, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.4891,\n 11.5868, 11.5109, 11.4356, 11.3608, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.7611, 11.8551, 11.9487, 12.0419, 11.9701, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why do my dreams are not coming true?\nQuestion 2: Can dreams come true?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "2", + "Fraction of T in Greenlist": "12.5%", + "z-score": "-1.15", + "p value": "0.876", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.1776, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.8034, 2.0605, 1.9415, 2.1909,\n 2.0738, 2.3163, 2.5533, 2.7852, 2.6681, 2.8943, 2.7791, 3.0000,\n 2.8868, 3.1027, 3.3147, 3.5228, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.6571, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.6904, 4.8669, 4.7610, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.4610, 5.6220, 5.7812, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.6944, 6.5970, 6.5008, 6.6454, 6.7886, 6.6935, 6.5997,\n 6.7414, 6.8819, 7.0211, 7.1591, 7.2960, 7.2029, 7.3386, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.2733, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.0267, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.2516, 10.1749, 10.0987, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.4909, 10.5921, 10.5181, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.5235, 11.4525, 11.5470, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How will the ban on existing 500 and 1000 rupee note affect India? What are the pros and cons?\nQuestion 2: What are the advantages and disadvantages of 500 and 1000 rupees ban in India?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "12.6%", + "z-score": "-4.05", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.0620, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -2.8402,\n -2.6302, -2.6778, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.9336, -2.9775, -3.0210, -3.0641,\n -3.1069, -2.9161, -2.9593, -3.0022, -3.0448, -3.0870, -3.1288, -3.1704,\n -3.2116, -3.2525, -3.2931, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.3131, -3.3526, -3.1765, -3.0019, -3.0424, -2.8701, -2.9109, -2.9515,\n -2.9917, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -2.9762, -3.0151, -3.0538, -3.0923, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.2348, -3.2717,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.4780, -3.3282, -3.3637, -3.3989, -3.4340, -3.4689,\n -3.5036, -3.5382, -3.5725, -3.6067, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.6645, -3.6979, -3.7311, -3.7641, -3.7970, -3.8297,\n -3.8623, -3.8947, -3.9269, -3.9590, -3.9910, -4.0228, -3.8838, -3.9158,\n -3.9476, -3.9793, -4.0109, -3.8739, -3.9056, -3.7697, -3.8016, -3.8333,\n -3.8649, -3.8964, -3.9278, -3.9590, -3.9900, -4.0210, -4.0518])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.8320, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 10.2172, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.9338, 12.0499, 11.9230, 12.0386, 12.1533, 12.0289, 12.1432, 12.2565,\n 12.1346, 12.2474, 12.3595, 12.2398, 12.1216, 12.0049, 11.8896, 11.7757,\n 11.6631, 11.5519, 11.6652, 11.5556, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.2207, 12.3289, 12.2221, 12.1164, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.4405, 12.3377, 12.4434, 12.5485, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.8618, 12.7622, 12.6635, 12.7660, 12.6684, 12.7704, 12.8719,\n 12.9728, 12.8766, 12.7812, 12.8819, 12.9820, 12.8877, 12.7943, 12.8942,\n 12.8017, 12.9011, 13.0000, 12.9085, 12.8179, 12.9165, 12.8267, 12.7376,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.8556, 12.7690, 12.6830, 12.7802,\n 12.8769, 12.7918, 12.7073, 12.8037, 12.8997, 12.8160, 12.7329, 12.6504,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.5979, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.1746, 13.2668, 13.3585, 13.4499,\n 13.3710, 13.4620, 13.5526, 13.4744, 13.5647, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.8447, 13.9332, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the differences between BitBucket and GitHub?\nQuestion 2: What are the pros and cons of GitHub versus Bitbucket?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.4907, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.6448, 0.5832, 0.7543, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.2049, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 4.9358, 4.7140,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.6647, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.6573, 5.5432, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.7469, 6.8931,\n 6.7931, 6.9378, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.7778, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.6785, 7.8065, 7.7192, 7.8463, 7.7598, 7.8859, 8.0111,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.2107, 8.3324, 8.2483, 8.1650,\n 8.0824, 8.2032, 8.3231, 8.4423, 8.5607, 8.4788, 8.3977, 8.5153,\n 8.4348, 8.3550, 8.4718, 8.5879, 8.7033, 8.8179, 8.9319, 8.8527,\n 8.7742, 8.6963, 8.8095, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.2118, 9.1357, 9.0601, 9.1694, 9.0944, 9.0200, 8.9461,\n 8.8728, 8.8000, 8.9086, 9.0167, 9.1242, 9.0518, 9.1587, 9.2651,\n 9.1932, 9.2990, 9.2276, 9.3328, 9.2619, 9.3665, 9.4707, 9.5743,\n 9.5038, 9.6069, 9.5369, 9.6394, 9.5698, 9.5007, 9.6028, 9.7043,\n 9.8054, 9.7367, 9.8373, 9.9374, 10.0371, 10.1363, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.2949, 10.3923, 10.3248, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the life lessons that Batman teaches us?\nQuestion 2: What are the life lessons one can learn from Batman?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "91", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "27.5%", + "z-score": "0.545", + "p value": "0.293", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.5447])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.0012, 3.8490, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 7.7710, 7.9196, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.7277, 8.8626,\n 8.9963, 8.8853, 8.7758, 8.6678, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.9813, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.3695, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.0371, 11.1435, 11.2493, 11.1621,\n 11.0756, 10.9898, 10.9048, 10.8204, 10.9259, 11.0309, 10.9473, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.0569, 10.9773, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.4244, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.5109, 11.4356, 11.5329, 11.6297, 11.5549, 11.6514, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.9586, 12.0529, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.2794, 12.2068, 12.1347, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some neurogaming startups?\nQuestion 2: What are startups?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.3590, -1.4056, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.5181, -1.5614, -1.4100, -1.4535, -1.4967, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -1.8656, -1.9052, -1.9445, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.2008, -2.2377, -2.1028, -2.1398, -2.1766, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.2258, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.3447, -2.2156, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 7.9754, 7.8598, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.2146, 10.3301,\n 10.2348, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.7257, 10.6338, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.2142, 11.1253, 11.2316, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.4762, 11.5797, 11.6827, 11.5966, 11.5111, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 12.0341, 12.1335, 12.0499,\n 11.9669, 12.0660, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.4065, 12.3263, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 12.8313, 12.7532, 12.8464, 12.7688,\n 12.6918, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.1667,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the remainder when [math]x^{1999}[/math] is divided by [math](x^2-1)[/math] ?\nQuestion 2: A polynomial leaves remainder [math]2[/math] when divided by [math]x-1[/math] and remainder [math]1[/math] when divided by [math]x-2[/math]. If the polynomial is divided by [math](x-1)(x-2)[/math], then what would be the remainder?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.29", + "p value": "0.000508", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 2.0381, 1.8889, 1.7457, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.3094, 2.5627, 2.8098, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 1.9795, 1.8728, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.5621, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.6765, 2.5775, 2.4804, 2.3851, 2.2916, 2.4930, 2.4004, 2.3094,\n 2.5064, 2.7005, 2.6098, 2.8006, 2.7107, 2.8983, 2.8093, 2.7217,\n 2.9057, 3.0873, 3.0000, 2.9140, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.0151, 2.9329, 2.8518, 2.7717, 2.6928, 2.6148, 2.7854, 2.7080,\n 2.8764, 2.7995, 2.9656, 3.1300, 3.0533, 2.9775, 2.9025, 3.0641,\n 3.2242, 3.1493, 3.0754, 3.0022, 2.9299, 3.0870, 3.0151, 2.9439,\n 3.0989, 3.2525, 3.1814, 3.3333, 3.2627, 3.4130, 3.3428, 3.2733,\n 3.4217, 3.5689, 3.4995, 3.4308, 3.3627, 3.2953, 3.2285, 3.1623,\n 3.0967, 3.0317, 2.9673, 2.9035, 3.0467, 2.9832, 3.1251, 3.0619,\n 3.2025, 3.3420, 3.2788, 3.2161, 3.1539, 3.2918, 3.4286, 3.3665,\n 3.3049, 3.2437, 3.1831, 3.3181, 3.2577, 3.1977, 3.3314, 3.4641,\n 3.4042, 3.5359, 3.4762, 3.6068, 3.5474, 3.4884, 3.6178, 3.7463,\n 3.6874, 3.6289, 3.5708, 3.5131, 3.4558, 3.3989, 3.3424, 3.2863])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "62.9%", + "z-score": "12.2", + "p value": "1.83e-34", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.0401, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.9586, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.3951, 9.3042, 9.2143, 9.3326, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.7006, 10.6196, 10.5393, 10.6439, 10.5642, 10.4852, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.9936, 11.0937, 11.1933, 11.2924, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.9176,\n 11.8429, 11.9380, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.0935, 12.1867])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can mechanical energy be conserved?\nQuestion 2: When and how is mechanical energy not conserved?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -1.9052, -1.9868, -2.0656, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.4967, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.1357, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.0000,\n 0.1273, 0.0847, 0.2111, 0.1684, 0.1260, 0.0838, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.1603, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 4.0451, 4.2563, 4.1265, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.4721, 4.6571, 4.5461, 4.4371, 4.6188,\n 4.7980, 4.6904, 4.5847, 4.7610, 4.6568, 4.5544, 4.7278, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.4610, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.6454, 6.7886, 6.6935, 6.5997,\n 6.7414, 6.8819, 6.7890, 6.9282, 7.0662, 7.2029, 7.3386, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.5556, 7.6867, 7.5967, 7.5076, 7.6376,\n 7.7667, 7.6785, 7.8065, 7.9336, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.3813, 8.5030, 8.6238, 8.5381, 8.4532, 8.3691,\n 8.4891, 8.4057, 8.5249, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.4812, 9.5931, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.5714, 9.4916, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.9067, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.0987, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.1398, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.5001, 10.5998, 10.6990, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.8770, 10.8064, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.9564, 10.8872, 10.8184, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the Sahara, and how do the average temperatures there compare to the ones in the Taklamakan Desert?\nQuestion 2: What is the Sahara, and how do the average temperatures there compare to the ones in the Registan Desert?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.3531, -0.2010, -0.2503, -0.2993, -0.3478, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.1651, 8.0741,\n 8.2012, 8.1111, 8.0219, 8.1481, 8.0598, 7.9724, 7.8859, 8.0111,\n 7.9254, 7.8406, 7.9649, 8.0882, 8.2107, 8.3324, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.3017, 9.2232, 9.1452, 9.0679, 8.9912,\n 9.1018, 9.0257, 9.1357, 9.2450, 9.3537, 9.4619, 9.3863, 9.3113,\n 9.2368, 9.3443, 9.4513, 9.5577, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.9178, 11.0165, 10.9431, 10.8702, 10.9685, 10.8961,\n 10.9939, 10.9220, 11.0194, 11.1164, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.4525, 11.5470, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can gun control prevent assault?\nQuestion 2: Can gun control prevent a robbry?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -1.9066, -1.7303, -1.7778, -1.8249, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -1.8808, -1.7217, -1.7655, -1.8091, -1.6521, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.2517, -2.2892, -2.3264, -2.1884, -2.2258, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.2860, -2.3221, -2.3580, -2.2258, -2.0943, -1.9635, -2.0000,\n -2.0364, -2.0726, -1.9432, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 2.8301, 2.6943, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.6098, 3.4816, 3.3566, 3.5753, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.9614, 4.1633, 4.0446, 4.2426, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.3614, 4.5461, 4.7281, 4.9075,\n 4.7980, 4.6904, 4.8669, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.0680, 4.9666, 4.8667, 5.0332, 4.9346, 4.8375, 4.7419, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.0395, 5.1977, 5.3541, 5.2614, 5.1698,\n 5.3243, 5.4772, 5.3865, 5.2970, 5.2086, 5.3594, 5.2719, 5.1855,\n 5.1000, 5.2489, 5.1643, 5.3116, 5.2278, 5.1450, 5.2906, 5.4349,\n 5.3526, 5.2713, 5.4140, 5.5556, 5.4747, 5.3947, 5.3156, 5.4554,\n 5.3769, 5.2992, 5.2223, 5.3606, 5.2842, 5.4212, 5.3455, 5.2705,\n 5.4061, 5.5407, 5.4661, 5.3921, 5.5255, 5.6578, 5.7892, 5.9196,\n 6.0491, 5.9752, 5.9019, 6.0302, 5.9575, 5.8853, 6.0125, 6.1389,\n 6.2644, 6.3892, 6.3172, 6.2458, 6.3694, 6.4923, 6.4213, 6.3509,\n 6.4728, 6.5939, 6.7143, 6.8339, 6.7637, 6.6939, 6.6248, 6.7434,\n 6.6747, 6.6064, 6.7242, 6.8413, 6.9577, 7.0735, 7.1885, 7.1204,\n 7.0527, 7.1670, 7.0998, 7.0330, 7.1465, 7.2594, 7.3717, 7.4833,\n 7.4167, 7.3506, 7.4615, 7.5719, 7.5061, 7.4407, 7.5503, 7.6594,\n 7.7679, 7.8759, 7.8107, 7.9181, 7.8533, 7.9601, 7.8956, 7.8316,\n 7.9377, 8.0433, 8.1485, 8.2531, 8.3572, 8.2933, 8.2298, 8.3333,\n 8.2702, 8.2074, 8.3103, 8.4128, 8.5148, 8.6164, 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How can I lose 4kg weight?\nQuestion 2: What are the ways of losing weight?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 1.1547, 1.4237, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 1.0136, 0.9396, 1.1333, 1.3245, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.3389, 1.2710, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.3483, 1.5164, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.1896, 1.3460, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.6513, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.7789, 0.7336, 0.6885, 0.6437, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.9313, 0.8866, 1.0106, 1.1339, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.0336, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "39", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "69.2%", + "z-score": "6.38", + "p value": "8.91e-11", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990, 4.4264,\n 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962, 5.4611, 5.7155,\n 5.9604, 5.6804, 5.4175, 5.1698, 4.9358, 4.7140, 4.9652, 4.7556, 5.0000,\n 5.2372, 5.4678, 5.2705, 5.4958, 5.7155, 5.9297, 6.1389, 6.3434, 6.5433,\n 6.3594, 6.1815, 6.3791])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which is the most independent country?\nQuestion 2: What country has the most attractive women -- either in absolute terms or in density?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.1111, -2.1822, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.3660, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.5506, -2.5886, -2.4421, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.5460, -2.4099, -2.2744, -2.3110, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "149", + "Fraction of T in Greenlist": "74.9%", + "z-score": "16.2", + "p value": "1.15e-59", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.7660, 12.6684, 12.7704, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.2722, 13.3710, 13.4691, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.7619, 13.6679, 13.7638, 13.8593, 13.7663, 13.8615,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.1458, 14.2388, 14.3313, 14.4234,\n 14.5150, 14.6062, 14.5161, 14.6070, 14.6976, 14.6084, 14.6986, 14.7885,\n 14.8779, 14.9669, 14.8789, 14.9677, 15.0560, 15.1440, 15.2316, 15.1448,\n 15.0585, 15.1460, 15.2331, 15.3198, 15.4062, 15.4922, 15.5778, 15.6631,\n 15.7481, 15.6633, 15.7481, 15.8325, 15.9165, 16.0002, 15.9165, 16.0000,\n 16.0832, 16.0002, 16.0832, 16.1658, 16.2481, 16.1660, 16.2481])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some reviews of www.cosmicpetrochem.com?\nQuestion 2: What is your review of www.buttermyresume.com?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "89", + "Fraction of T in Greenlist": "44.7%", + "z-score": "6.43", + "p value": "6.57e-11", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.8577,\n 3.1156, 2.9704, 2.8301, 3.0792, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.7273, 3.6141, 3.8146, 4.0119,\n 4.2060, 4.0937, 3.9837, 3.8759, 3.7700, 3.9595, 4.1461, 4.0415,\n 3.9386, 4.1219, 4.3026, 4.2008, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.6000, 4.5034, 4.4083, 4.3146, 4.4820, 4.6476,\n 4.5547, 4.4630, 4.3727, 4.2836, 4.4462, 4.6070, 4.5186, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.2507, 4.1684, 4.3241, 4.2426,\n 4.1621, 4.0825, 4.2359, 4.1569, 4.0788, 4.2303, 4.1528, 4.3027,\n 4.4511, 4.3740, 4.2977, 4.2222, 4.1475, 4.0736, 4.2196, 4.3644,\n 4.5079, 4.4341, 4.5762, 4.7173, 4.6437, 4.5708, 4.7104, 4.6380,\n 4.7763, 4.9135, 4.8414, 4.7700, 4.6992, 4.6291, 4.5596, 4.6949,\n 4.8291, 4.9624, 4.8930, 5.0252, 5.1564, 5.0873, 5.0187, 5.1488,\n 5.0806, 5.2096, 5.3378, 5.2699, 5.2025, 5.1357, 5.0694, 5.0037,\n 5.1303, 5.2560, 5.3810, 5.3153, 5.4393, 5.5626, 5.4971, 5.4322,\n 5.5544, 5.4899, 5.6112, 5.7319, 5.6675, 5.6036, 5.5402, 5.4772,\n 5.4147, 5.5340, 5.6527, 5.7707, 5.7082, 5.8254, 5.9420, 5.8797,\n 5.8179, 5.9336, 5.8721, 5.9871, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.7987, 5.9120, 6.0246, 6.1367, 6.0770, 6.1884, 6.2993,\n 6.2398, 6.1807, 6.2908, 6.2319, 6.3414, 6.4504, 6.3917, 6.3333,\n 6.2753, 6.2177, 6.1604, 6.2684, 6.3758, 6.4828, 6.4256])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "50.7%", + "z-score": "6.88", + "p value": "2.93e-12", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.0012, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 4.9962, 4.8712, 5.0602, 4.9377,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.5137, 6.4065, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.7416, 6.6454, 6.7886, 6.9305, 6.8354,\n 6.9759, 6.8819, 6.7890, 6.9282, 7.0662, 6.9743, 6.8834, 6.7937,\n 6.7049, 6.8414, 6.9768, 6.8889, 6.8019, 6.7159, 6.6308, 6.7648,\n 6.8977, 6.8133, 6.9451, 6.8615, 6.7788, 6.9094, 7.0391, 6.9570,\n 6.8757, 6.7952, 6.7155, 6.8439, 6.9714, 7.0980, 7.0187, 6.9402,\n 6.8624, 6.9879, 7.1125, 7.0353, 6.9587, 6.8828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I install Windows 10 on a specific hard drive?\nQuestion 2: How do I install Windows 10 on new Hard drive?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "22", + "Fraction of T in Greenlist": "30.1%", + "z-score": "1.01", + "p value": "0.155", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.1929, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.5347, 8.4017, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.2967, 10.4169,\n 10.5363, 10.4304, 10.3257, 10.4444, 10.5623, 10.4592, 10.5763, 10.6927,\n 10.5909, 10.7066, 10.8215, 10.9355, 10.8353, 10.9488, 11.0615, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.3277, 12.2381, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.3754, 12.4746, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.6103, 12.7073, 12.8037, 12.7199, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.3002, 13.2182, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.3343, 13.2542, 13.3463, 13.4380, 13.5292, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.9007, 13.9897, 13.9113, 13.8333,\n 13.9221, 13.8447, 13.9332, 14.0214, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I create a stage name for myself?\nQuestion 2: How can I create a stage name?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.0646, -1.1206, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.2857, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.6241, -1.6632, -1.5275, -1.5667,\n -1.4321, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.3088, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.0779, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.7006, 6.8483, 6.7469, 6.8931,\n 7.0379, 6.9378, 7.0812, 6.9824, 6.8849, 7.0268, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.0211, 6.9282, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.3810, 7.5143, 7.4233, 7.3333, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.1731, 8.0882, 8.0042, 8.1266, 8.2483, 8.1650,\n 8.2858, 8.2032, 8.3231, 8.4423, 8.3605, 8.4788, 8.3977, 8.5153,\n 8.6321, 8.5516, 8.6677, 8.5879, 8.5088, 8.6241, 8.5456, 8.6603,\n 8.7742, 8.8874, 9.0000, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.3212, 9.2450, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.6322, 9.7380, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.7886, 9.7167, 9.8198, 9.9224,\n 9.8510, 9.9531, 9.8821, 9.9837, 10.0848, 10.0143, 10.1149, 10.0448,\n 10.1450, 10.2447, 10.1750, 10.2743, 10.2050, 10.1363, 10.2350, 10.1667,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.3248, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: ? to be deleted\nQuestion 2: Did Hitler underestimate the jewish mafia-nation?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "26.0%", + "z-score": "0.203", + "p value": "0.42", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.2116, -0.2801, -0.3475, -0.1380, 0.0685, 0.0000,\n 0.2027])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 8.2195, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.2717, 9.1455, 9.2828, 9.1589, 9.0370,\n 9.1735, 9.0536, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 9.7738, 9.8995,\n 10.0242, 9.9146, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.2061, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.9355, 11.0488, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.2966, 11.4065, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.2694, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.4915, 12.5930, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 14.1543, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.5226, 14.6126, 14.7023, 14.7916, 14.8804, 14.7939, 14.8825, 14.7966,\n 14.7113, 14.7998, 14.8878, 14.9755, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.4085, 15.4940, 15.4103, 15.4956, 15.5805, 15.6651, 15.7494, 15.6667,\n 15.7507, 15.8344, 15.7524, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What do Laxmi, Saraswati, and Ganesha symbolize?\nQuestion 2: Why are Laxmi, Saraswati, and Ganesha depicted together?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "166", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.06", + "p value": "0.98", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.5717, -2.6128, -2.6536,\n -2.4908, -2.3293, -2.3708, -2.2111, -2.0526, -2.0948, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.2780, -2.3176, -2.3570, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.3351, -2.1909,\n -2.0476, -2.0866, -1.9445, -1.9837, -2.0226, -2.0613])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.83", + "p value": "4.13e-23", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.0123, 2.8943, 3.1160, 3.0000,\n 2.8868, 3.1027, 2.9913, 3.2026, 3.4101, 3.2998, 3.1918, 3.3947,\n 3.2883, 3.1840, 3.0817, 3.2796, 3.4743, 3.3729, 3.2733, 3.4641,\n 3.3657, 3.5533, 3.4562, 3.6407, 3.5447, 3.4503, 3.3574, 3.5382,\n 3.7166, 3.8927, 4.0667, 4.2385, 4.4083, 4.3146, 4.2222, 4.3894,\n 4.5547, 4.4630, 4.6262, 4.7875, 4.9472, 5.1051, 5.2614, 5.4160,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.0740, 5.9827, 5.8926,\n 5.8035, 5.7155, 5.8606, 5.7735, 5.6874, 5.8310, 5.9732, 5.8878,\n 5.8034, 5.7199, 5.8605, 6.0000, 6.1383, 6.0553, 6.1924, 6.1101,\n 6.0287, 6.1644, 6.0837, 6.2183, 6.1382, 6.2716, 6.1923, 6.1137,\n 6.2459, 6.3770, 6.5072, 6.4291, 6.5583, 6.4807, 6.6089, 6.7361,\n 6.6591, 6.5828, 6.7090, 6.8343, 6.7585, 6.6833, 6.8076, 6.7330,\n 6.6591, 6.5857, 6.7089, 6.8313, 6.9529, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.3584, 7.2857, 7.4034, 7.3312, 7.4482, 7.5644, 7.4927,\n 7.6082, 7.5369, 7.6517, 7.5809, 7.6950, 7.8084, 7.9211, 8.0333,\n 8.1448, 8.2557, 8.3660, 8.4757, 8.4050, 8.3349, 8.4439, 8.5524,\n 8.4826, 8.5905, 8.6978, 8.6284, 8.7351, 8.8413, 8.7724, 8.8780,\n 8.9830, 9.0876, 9.1916, 9.2952, 9.2265, 9.3295, 9.4320, 9.5341,\n 9.4658, 9.5673, 9.4995, 9.4321, 9.5331, 9.6336, 9.7337, 9.6667,\n 9.7663, 9.8654, 9.7987, 9.8974, 9.8311])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can you grow a tree in zero gravity?\nQuestion 2: How do trees grow in a zero gravity environment?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.3744, 1.6013, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.4812, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.5974, 0.7570, 0.9152, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.8850, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.9739, 1.1094, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.9272, 1.0565, 1.1852, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.2982, 1.4241, 1.3771, 1.3303, 1.2839, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.3474, 1.3019, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.4878, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.6423, 8.7943, 8.6461, 8.7967, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.6702, 8.8168, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.5443, 9.6786, 9.5534,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.7065, 9.8367, 9.7181, 9.8473,\n 9.7306, 9.8590, 9.9863, 10.1124, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.7348, 10.8542, 10.7429, 10.8616, 10.7518, 10.8699,\n 10.9870, 10.8790, 10.7722, 10.8889, 10.7835, 10.8995, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.4849, 11.5950,\n 11.4945, 11.6041, 11.5048, 11.6139, 11.7222, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.4915, 12.5930, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.5289, 12.6287, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 13.1364, 13.2324, 13.1453,\n 13.2410, 13.1547, 13.2499, 13.1644, 13.2593, 13.3537, 13.2690, 13.3631,\n 13.2791, 13.3728, 13.2895, 13.3829, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.0329, 14.1227, 14.0414, 14.1309,\n 14.0502, 14.1393, 14.2282, 14.1482, 14.0687, 14.1573, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What do Russian people think of Gorbachev?\nQuestion 2: What is the perception of Mikhail Gorbachev among Russians today?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "23.3%", + "z-score": "-0.338", + "p value": "0.632", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.3183, 7.1857, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.1590, 7.0387, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 7.9455, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.2674, 11.1810, 11.2857, 11.2001, 11.1151, 11.0309, 11.1352, 11.0517,\n 10.9689, 10.8867, 10.9906, 11.0940, 11.1968, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.3402, 11.2602, 11.3610, 11.2816, 11.3820, 11.3032, 11.4031,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.4356, 11.5329, 11.4581, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.4300, 11.3572, 11.2848, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "accuracy_without_watermark": 0.53, + "accuracy_with_watermark": 0.51, + "f1_without_watermark": 0.38961038961038963, + "f1_with_watermark": 0.19672131147540983 + } + }, + "validation": { + "results": [ + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why are African-Americans so beautiful?\nQuestion 2: Why are hispanics so beautiful?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.6667, 1.8543, 1.7765, 1.9612, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 2.0158, 2.1918, 2.1167, 2.2902, 2.4618,\n 2.3868, 2.5560, 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.1213,\n 2.0517, 1.9829, 1.9149, 1.8475, 1.7809, 1.7150, 1.6498, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.4931, 1.6514, 1.5892, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.6591, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.9524, 1.8953, 1.8385, 1.9803,\n 2.1210, 2.2608, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.3094,\n 2.4453, 2.3891, 2.3333, 2.4678, 2.4122, 2.5456, 2.4902, 2.4351,\n 2.3805, 2.3262, 2.4578, 2.4037, 2.3500, 2.2966, 2.2436, 2.1909,\n 2.1386, 2.0866, 2.2159, 2.1640, 2.2923, 2.2406, 2.3679, 2.4944,\n 2.6203, 2.5683, 2.6932, 2.6414, 2.5898, 2.7137, 2.6623, 2.6112,\n 2.5604, 2.5099, 2.6323, 2.7541, 2.8752, 2.8245, 2.7741, 2.7240,\n 2.6742, 2.7940, 2.7443, 2.6949, 2.6458, 2.5969, 2.5483, 2.6667,\n 2.6182, 2.5700, 2.6874, 2.8043, 2.9205, 2.8721, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 8.0928, 7.9754, 8.1196, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.4540, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.6678, 8.8007, 8.9324, 9.0629, 8.9567,\n 9.0863, 8.9815, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.9392, 9.8414, 9.9601, 9.8634, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.3695, 10.4829, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.7635, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 11.0183, 10.9301, 11.0371, 11.1435, 11.2493, 11.1621,\n 11.2674, 11.1810, 11.2857, 11.3899, 11.4935, 11.4080, 11.5111, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.6487, 11.7498, 11.6666, 11.7672, 11.8673,\n 11.9669, 11.8846, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.3263, 12.4223, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.5495, 12.6439, 12.5657, 12.6597, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.8007, 12.8928, 12.9845, 13.0758, 13.0000,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.2864, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: I want to pursue PhD in Computer Science about social network,what is the open problem in social networks?\nQuestion 2: I handle social media for a non-profit. Should I start going to social media networking events? Are there any good ones in the bay area?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, 0.0558, 0.2222, 0.3871, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.8868, 1.0235, 1.1593, 1.1094, 1.0598, 1.0105, 0.9615, 1.0954,\n 1.0465, 1.1794, 1.1305, 1.2623, 1.2136, 1.1651, 1.1169, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.2326, 1.1852, 1.1380, 1.2657, 1.2185,\n 1.3453, 1.2982, 1.2514, 1.3771, 1.3303, 1.2839, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.0336, 0.9897, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.0779, 4.9373, 4.8003, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.1355, 6.0125, 6.1828, 6.0622,\n 5.9438, 6.1118, 5.9954, 5.8812, 5.7689, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.2232, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.6976, 8.6035, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.1692, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.6683, 10.5893,\n 10.5109, 10.4330, 10.5366, 10.6397, 10.7423, 10.6650, 10.5884, 10.6904,\n 10.6144, 10.5388, 10.6404, 10.5654, 10.6665, 10.7671, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.3608, 11.2864, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.7901, 11.8849, 11.8117, 11.7389, 11.8333,\n 11.9273, 11.8551, 11.7833, 11.8769, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is there a reason why we should travel alone?\nQuestion 2: What are some reasons to travel alone?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -1.0050, -1.0513, -0.8978, -0.7454, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -0.9608, -0.8333,\n -0.8727, -0.7461, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.4312, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 7.9216, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.5337, 8.6678, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.0323, 8.9314, 9.0582,\n 8.9586, 8.8602, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.0504, 10.1627, 10.0748, 10.1865, 10.0995,\n 10.2106, 10.3209, 10.2348, 10.3445, 10.4537, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.2602, 11.3610, 11.4614, 11.3820, 11.3032, 11.4031,\n 11.3249, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.4132, 11.5109, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.0529, 12.1468, 12.2403, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why are people so obsessed with having a girlfriend/boyfriend?\nQuestion 2: How can a single male have a child?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -2.3850, -2.0412,\n -2.1106, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.2418, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.2151, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.7369, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -2.8928, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.7325, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -2.8852, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.6984, -2.7361, -2.7735, -2.6264, -2.6640, -2.5183, -2.5560,\n -2.4116, -2.2680, -2.1254, -2.1640, -2.2024, -2.2406, -2.0998, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -1.8556, -1.7213, -1.5878, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.7039, -1.5752, -1.6125, -1.6496, -1.6865, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.2776, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.4738, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.2857, 11.3899, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.5290, 11.4450, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 12.8546, 12.7735, 12.6930, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.1746, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.9007, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.2737, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some good baby girl names starting with D?\nQuestion 2: What are some good baby girl names starting with D or H?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.9%", + "z-score": "-2.31", + "p value": "0.99", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.4121, -2.4531, -2.4938, -2.5342, -2.3764,\n -2.4170, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.9216, -2.9584, -2.9950, -3.0315, -3.0677, -3.1038,\n -3.1396, -3.1753, -3.2107, -3.2460, -3.2811, -3.3160, -3.3508, -3.2071,\n -3.2420, -3.2768, -3.3113, -3.1696, -3.0288, -2.8887, -2.7495, -2.7852,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.6178, -2.6534, -2.5181, -2.5538,\n -2.4195, -2.4553, -2.3221, -2.3580, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.3094])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.1603, 4.0166,\n 3.8772, 3.7417, 3.9620, 3.8297, 4.0451, 3.9158, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.5432, 5.4312, 5.3211, 5.4909, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.7242, 5.8835, 5.7812, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.6944, 6.5970, 6.7416, 6.8849, 6.7886, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.2532, 7.1591, 7.0662, 6.9743, 6.8834, 7.0201,\n 6.9303, 7.0657, 6.9768, 7.1111, 7.2443, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.3758, 7.5048, 7.6328, 7.7598, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.5607, 8.4788, 8.5964, 8.5153,\n 8.6321, 8.5516, 8.6677, 8.7831, 8.7033, 8.8179, 8.7388, 8.8527,\n 8.7742, 8.6963, 8.8095, 8.9221, 9.0340, 9.1452, 9.2559, 9.3659,\n 9.4752, 9.5840, 9.6921, 9.6148, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.8131, 9.7380, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.9642, 9.8918, 9.9944, 9.9224,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.2565, 10.3566, 10.4563, 10.5556,\n 10.6544, 10.5833, 10.5128, 10.4427, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.5974, 10.5286, 10.4603, 10.5573, 10.4893, 10.5859, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: I am 25 year old guy and never had a girlfriend. Is this weird?\nQuestion 2: I am 25 years old. I have never had a girlfriend. Is something wrong with me?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "66", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "36.4%", + "z-score": "2.13", + "p value": "0.0165", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.5627, 2.8098, 2.6811, 2.5560,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 2.2418, 2.4585, 2.3570, 2.5690, 2.7775,\n 2.6765, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094, 2.0207,\n 1.9335, 2.1320])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: \"What does a good answer on Quora look like? What does it mean to \"\"be helpful\"\"?\"\nQuestion 2: How do you write a good answer on Quora?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "9", + "Fraction of T in Greenlist": "14.3%", + "z-score": "-1.96", + "p value": "0.975", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.3566, 3.5753, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.9614, 4.1633, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.5118, 4.7002, 4.8857, 5.0684, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.0553, 7.2016, 7.0973, 6.9945, 6.8931,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 8.0829, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.3408, 9.4606, 9.3678, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.7473, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.0183, 10.9301, 11.0371, 10.9497, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.2857, 11.3899, 11.4935, 11.5966, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.7199, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.4859, 13.5781,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.5726, 14.6599, 14.5797, 14.6667,\n 14.7533, 14.6738, 14.7601, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the best things to do in Hong Kong?\nQuestion 2: What is the best thing in Hong Kong?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "19.0%", + "z-score": "-1.09", + "p value": "0.862", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.9803, 6.8214, 6.6667,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.2572, 8.1651, 8.0741,\n 7.9839, 8.1111, 8.0219, 7.9336, 7.8463, 7.9724, 7.8859, 7.8003,\n 7.7155, 7.6315, 7.5484, 7.6734, 7.5910, 7.5094, 7.6335, 7.5526,\n 7.4724, 7.3930, 7.5161, 7.6383, 7.7597, 7.6808, 7.8014, 7.7232,\n 7.6456, 7.7653, 7.8842, 7.8072, 7.9253, 7.8489, 7.9663, 7.8905,\n 7.8153, 7.9318, 7.8571, 7.9729, 8.0880, 8.2024, 8.1282, 8.0546,\n 8.1683, 8.0952, 8.2082, 8.3205, 8.2479, 8.1758, 8.2874, 8.3984,\n 8.3268, 8.4371, 8.5469, 8.4757, 8.5848, 8.6933, 8.8013, 8.7305,\n 8.8379, 8.9447, 9.0510, 8.9806, 8.9107, 9.0164, 9.1215, 9.2261,\n 9.3302, 9.4338, 9.5369, 9.6394, 9.7415, 9.8431, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.2743, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.4312, 10.5286, 10.6256, 10.5573, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why is my life getting so complicated?\nQuestion 2: Why is my life so complicated?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, 0.0000, -0.0420, -0.0838, -0.1253, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.3054, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 8.0076, 7.8320, 7.6613, 7.8320, 8.0000,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 9.0949, 8.9443, 9.0924, 8.9455, 9.0924, 9.2376,\n 9.0947, 9.2387, 9.0990, 9.2418, 9.1051, 9.2469, 9.1130, 8.9815,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.6867, 9.8187, 9.9495, 9.8271, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.2283, 10.1124, 10.2375, 10.3615, 10.4846, 10.3709,\n 10.2587, 10.3812, 10.2706, 10.3923, 10.5131, 10.4042, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.0254, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.0615, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.5157, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.6465, 11.5515, 11.6584, 11.5645, 11.6709, 11.7766, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.2381, 12.1492, 12.2503, 12.1622, 12.2628, 12.3629,\n 12.2758, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.8037, 12.7199, 12.8160, 12.7329, 12.8285,\n 12.9238, 12.8414, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.3463, 13.2668, 13.3585, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.3060, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.6789, 13.7679, 13.8564, 13.9446, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why should I crack Jee?\nQuestion 2: Is it possible to get into IIMs with low marks in graduation and 12th?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "21.9%", + "z-score": "-0.608", + "p value": "0.728", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "153", + "Fraction of T in Greenlist": "76.9%", + "z-score": "16.9", + "p value": "2.14e-64", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.5304, 9.4000, 9.5366, 9.6719, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.5470, 11.6620, 11.5476, 11.4345, 11.5492,\n 11.6631, 11.7762, 11.8885, 11.7778, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.4365, 12.3299, 12.4370, 12.5434, 12.6491,\n 12.7542, 12.6495, 12.7542, 12.8582, 12.9616, 13.0644, 13.1665, 13.2681,\n 13.3690, 13.2669, 13.1657, 13.2665, 13.3667, 13.4664, 13.5655, 13.4660,\n 13.5647, 13.6630, 13.7606, 13.8578, 13.9544, 14.0505, 14.1462, 14.0489,\n 13.9524, 14.0479, 14.1429, 14.2374, 14.3314, 14.2364, 14.3302, 14.4234,\n 14.5162, 14.6086, 14.7005, 14.7920, 14.8831, 14.7899, 14.6976, 14.7885,\n 14.8790, 14.9691, 15.0588, 14.9677, 15.0571, 15.1461, 15.2348, 15.3230,\n 15.4108, 15.4983, 15.5853, 15.4959, 15.4072, 15.4942, 15.5808, 15.6670,\n 15.7529, 15.6653, 15.7509, 15.8362, 15.9211, 16.0057, 16.0900, 16.1739,\n 16.2574, 16.1713, 16.0858, 16.1693, 16.2525, 16.3353, 16.4178, 16.3333,\n 16.4156, 16.4976, 16.5793, 16.6607, 16.7417, 16.8225, 16.9030])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can you TRANSLATE these to English language?\nQuestion 2: Can you translate this from Bengali to English language?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.6547,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.2839, -1.1523, -1.0215,\n -0.8914, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.0551, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.9704, 2.8301, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.0446, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 4.9075,\n 4.7980, 4.9747, 5.1490, 5.3211, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.6747, 8.5749, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.6490, 9.5543, 9.6732, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.6206, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.7696, 11.6827, 11.7851, 11.6990, 11.8010,\n 11.9024, 12.0032, 11.9181, 12.0185, 11.9341, 12.0341, 12.1335, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.3754, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.6785, 12.7735, 12.6930, 12.7876, 12.7077,\n 12.6283, 12.7226, 12.8165, 12.7378, 12.8313, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.1233, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.3615, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can anyone tell shall mi mix mobile should be purchased or not?\nQuestion 2: \"What did Voltaire mean when he said, \"\"God is a comedian playing to an audience that is too afraid to laugh\"\"?\"\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.0580, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.6036,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -1.9473, -1.9843, -2.0212, -1.8898, -1.9267, -1.7964, -1.6667,\n -1.7039, -1.7410, -1.6125, -1.6496, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 5.9628, 5.8398, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 7.8779,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.3521, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.5795, 9.6977,\n 9.8150, 9.9315, 9.8389, 9.7473, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.6265, 10.7349, 10.8426, 10.9497, 10.8631, 10.9697,\n 10.8838, 10.7987, 10.7143, 10.6306, 10.7367, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.7217, 11.8210, 11.7405, 11.8393, 11.9377,\n 12.0355, 11.9558, 12.0532, 12.1502, 12.2467, 12.3428, 12.2638, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.2429, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which is the best gaming laptop under 40k?\nQuestion 2: Which is the best gaming laptop under 40,000 rs?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.0767, -2.1320, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -1.9645, -1.7638, -1.8161, -1.8677, -1.9189, -1.7233,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -2.8845, -2.9216, -2.9584, -2.9950, -3.0315, -3.0677, -3.1038,\n -3.1396, -3.1753, -3.2107, -3.0657, -3.1013, -3.1368, -3.1720, -3.2071,\n -3.2420, -3.2768, -3.3113, -3.3457, -3.3799, -3.2389, -3.2733, -3.3075,\n -3.3415, -3.3754, -3.4091, -3.4427, -3.4760, -3.5093, -3.5424, -3.4050,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.6987, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.8041, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.0711,\n 6.8127, 7.0201, 6.7778, 6.9830, 7.1832, 7.3786, 7.1550, 6.9402,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 8.0076, 8.1763, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.1084, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 9.7989, 9.9352, 10.0701,\n 9.9333, 9.7989, 9.6667, 9.8015, 9.9351, 9.8058, 9.9384, 10.0698,\n 10.1999, 10.3287, 10.4565, 10.5830, 10.7084, 10.8328, 10.7084, 10.5859,\n 10.7098, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.4311, 11.5476, 11.6632, 11.7779, 11.8918, 11.7762, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 12.9641, 13.0677, 13.1707, 13.2730, 13.3747, 13.2701, 13.1665, 13.2681,\n 13.1657, 13.0643, 13.1657, 13.2665, 13.1665, 13.2669, 13.1680, 13.0699,\n 13.1701, 13.2698, 13.1730, 13.2722, 13.3710, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.4745, 13.5714, 13.4780, 13.3854, 13.4822, 13.5784, 13.4868,\n 13.5827, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.6986, 14.7885,\n 14.7002, 14.7898, 14.8789, 14.9677, 15.0560, 15.1440, 15.0570, 15.1448,\n 15.2321, 15.3191, 15.4057, 15.3198, 15.2345, 15.3210, 15.2364, 15.3226,\n 15.4085, 15.4940, 15.4103, 15.3272, 15.2446, 15.3301, 15.4152, 15.3333,\n 15.4182, 15.5028, 15.4217, 15.5060, 15.5900, 15.6736, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why Tamil Nadu Universities are no good?\nQuestion 2: Why are the 10th students able to score 100% in Tamil Nadu?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -1.9127, -1.9645, -1.7638, -1.8161, -1.8677, -1.9189, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -0.9802, -1.0235, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.0820, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.0038, -0.8704,\n -0.9113, -0.7789, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.4659, 2.7406, 2.5924, 2.8577,\n 3.1156, 3.3665, 3.2205, 3.0792, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.3853, 3.2577, 3.4816, 3.7009, 3.9158, 4.1265, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.8177, 4.7002, 4.8857, 4.7703, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.9333, 6.0928, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.0571, 8.1873, 8.3164, 8.2222, 8.1291, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.5030, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.9285, 9.0453, 8.9612, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.7109, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.7224, 9.8293, 9.9357, 9.8590,\n 9.9648, 10.0701, 9.9940, 10.0987, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.9178, 10.8444, 10.7714, 10.8702, 10.7978, 10.8961,\n 10.9939, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.4525, 11.3820, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is an easy way to clean toothpaste stains from a shirt without washing it?\nQuestion 2: What are some ways to remove hard water stains from dishes?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.9456, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.3308, 1.5119, 1.6908, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.3213, 1.4863, 1.6498, 1.5852,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.7143, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.6008, 1.7488, 1.8956, 1.8371,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.9524, 2.0948, 2.0373, 1.9803,\n 1.9237, 1.8676, 1.8119, 1.9518, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.6781, 1.8157, 1.7619, 1.7085, 1.6554, 1.7913, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.6732, 1.6230, 1.5731, 1.5236, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.5363, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.7213, 1.8453, 1.7974, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.8999, 1.8527, 1.9738, 1.9267, 2.0470, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.3235, 3.1623, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.1779, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.1111, 9.2351, 9.1380, 9.0419, 8.9469,\n 8.8529, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.2729, 9.3897,\n 9.3024, 9.4185, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.0389, 10.1494, 10.2592, 10.3683, 10.4769, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.6306, 10.7367, 10.8423, 10.7594, 10.6771,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.8176, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.7808, 11.8771, 11.9730, 11.8973, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.0327, 11.9586, 11.8849, 11.8117, 11.7389, 11.6667,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What was the deadliest battle in history?\nQuestion 2: What was the bloodiest battle in history?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "25.0%", + "z-score": "0", + "p value": "0.5", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284, 3.1177, 2.9439, 3.2222,\n 3.0551, 3.3235, 3.1623, 3.4219, 3.6742, 3.5176, 3.7626, 4.0012, 3.8490,\n 3.7017, 3.5590, 3.4207, 3.6515, 3.8772, 3.7417, 3.9620, 3.8297, 3.7009,\n 3.9158, 3.7897, 4.0000, 3.8765, 3.7559, 3.9614, 3.8431, 4.0446, 3.9284,\n 4.1260, 4.0119, 4.2060, 4.3970, 4.2844, 4.4721, 4.6571, 4.5461, 4.4371,\n 4.3301, 4.2251, 4.4061, 4.5847, 4.4809, 4.6568, 4.5544, 4.4537, 4.6268,\n 4.5274, 4.4296, 4.3333, 4.2385, 4.1451, 4.3146, 4.2222, 4.3894, 4.2981,\n 4.4630, 4.3727, 4.5356, 4.6967, 4.6070, 4.7662, 4.9237, 4.8347, 4.7469,\n 4.6603, 4.5747, 4.7296, 4.8830, 4.7980, 4.9497, 4.8655, 4.7823, 4.9322,\n 4.8497, 4.9980, 5.1450, 5.0630, 5.2085, 5.1273, 5.2713, 5.1908, 5.3333,\n 5.2535, 5.3947, 5.5348, 5.4554, 5.5942, 5.7318, 5.6530, 5.5750, 5.4977,\n 5.4212, 5.5572, 5.6921, 5.6160, 5.7498, 5.6743, 5.5995, 5.7320, 5.6578,\n 5.7892, 5.7155, 5.6424, 5.7726, 5.7001, 5.8292, 5.7572, 5.8853, 5.8138,\n 5.9409, 6.0671, 5.9960, 6.1213, 6.2458, 6.1750, 6.1047, 6.0351, 5.9660,\n 6.0892, 6.2116, 6.1429, 6.2644, 6.1961, 6.1283, 6.2489, 6.1815, 6.3013,\n 6.4203, 6.3532, 6.4715, 6.4048, 6.5223, 6.4559, 6.5727, 6.5067, 6.6227,\n 6.7380, 6.6724, 6.7869, 6.9009, 6.8355, 6.7706, 6.7061, 6.6421, 6.7551,\n 6.8675, 6.8037, 6.9155, 6.8520, 6.7890, 6.9000, 6.8373, 6.9477, 6.8853,\n 6.8233, 6.9330, 6.8713, 6.9803, 6.9190, 7.0273, 6.9663, 7.0741, 7.1813,\n 7.1205, 7.2272, 7.3333, 7.2728, 7.2125, 7.1527, 7.0932, 7.1985, 7.3034,\n 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are your views about demonetisation in India?\nQuestion 2: What do you think about the ban on 500 and 1000 denomination notes in India?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.2722, 0.1325, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.3482, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.2801, 0.4865, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.4191, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.9864, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.2089, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 7.6120, 7.8003, 7.5697, 7.7567,\n 7.9398, 7.7232, 7.5144, 7.3131, 7.1187, 7.3054, 7.1187, 6.9378,\n 7.1232, 6.9488, 7.1317, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.2121, 8.0928, 8.2369, 8.1196, 8.0042,\n 8.1471, 8.0335, 7.9216, 8.0632, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.3217, 8.2178, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.1176, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 9.0060, 8.9178, 8.8304, 8.7439, 8.6581, 8.7773,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.7610, 8.6783, 8.5964, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.7033, 8.8179, 8.9319, 9.0452,\n 8.9660, 9.0786, 9.1905, 9.1119, 9.2232, 9.1452, 9.2559, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.4299, 9.3537, 9.2782, 9.2032, 9.3113,\n 9.2368, 9.3443, 9.2704, 9.1970, 9.3040, 9.2311, 9.1587, 9.0869,\n 9.1932, 9.2990, 9.2276, 9.1567, 9.2619, 9.3665, 9.4707, 9.5743,\n 9.5038, 9.6069, 9.7095, 9.6394, 9.7415, 9.6719, 9.7735, 9.7043,\n 9.8054, 9.9060, 10.0061, 9.9374, 9.8691, 9.8012, 9.7337, 9.8333,\n 9.7663, 9.8654, 9.7987, 9.7325, 9.8311, 9.7653, 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the difference between polyester 210T and 210D?\nQuestion 2: What is the difference between cotton and polyester?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -1.7376, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.4576, -1.5119, -1.5656, -1.6187, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.6473, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -1.9688, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -1.9370, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.7792, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.8155, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "48.5%", + "z-score": "7.54", + "p value": "2.28e-14", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.8857, 4.7703, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.3211, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.5630, 5.4610, 5.3605, 5.2615, 5.4222,\n 5.3245, 5.2281, 5.3867, 5.5435, 5.4482, 5.6032, 5.5090, 5.4160,\n 5.5691, 5.4772, 5.3865, 5.5377, 5.4480, 5.3594, 5.5088, 5.6569,\n 5.5690, 5.4822, 5.3964, 5.3116, 5.4576, 5.3736, 5.2906, 5.2085,\n 5.3526, 5.2713, 5.4140, 5.3333, 5.2535, 5.1745, 5.0964, 5.2372,\n 5.3769, 5.2992, 5.4377, 5.5750, 5.4977, 5.4212, 5.5572, 5.6921,\n 5.8260, 5.7498, 5.6743, 5.5995, 5.5255, 5.4521, 5.5842, 5.7155,\n 5.6424, 5.7726, 5.9019, 6.0302, 5.9575, 6.0848, 6.2113, 6.3369,\n 6.2644, 6.3892, 6.5130, 6.4409, 6.5639, 6.6861, 6.8075, 6.9282,\n 7.0481, 6.9762, 6.9048, 6.8339, 6.7637, 6.8825, 6.8127, 6.7434,\n 6.8614, 6.7925, 6.7242, 6.6564, 6.5891, 6.7060, 6.8222, 6.9378,\n 6.8707, 6.9856, 7.0998, 7.0330, 7.1465, 7.0801, 7.1929, 7.1270,\n 7.2391, 7.3506, 7.4615, 7.3958, 7.5061, 7.4407, 7.3758, 7.3113,\n 7.4208, 7.5297, 7.4655, 7.5738, 7.6816, 7.6177, 7.5542, 7.4911,\n 7.4283, 7.5353, 7.4729, 7.4109, 7.5173, 7.6231, 7.5614, 7.6667,\n 7.6052, 7.5441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is FMS Delhi a good option for a MBA executive program?\nQuestion 2: Is FMS a good choice for an MBA in HR? Why and why not?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -1.9215, -1.9826, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.3420, -3.1779, -3.2161, -3.0538, -3.0923, -3.1305, -3.1685,\n -3.0089, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -3.0417, -3.0792,\n -3.1165, -3.1536, -3.1905, -3.2271, -3.2636, -3.2998, -3.3359, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.3282, -3.3637, -3.3989, -3.4340, -3.4689,\n -3.5036, -3.5382, -3.5725, -3.6067, -3.6407, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.4879, -3.3457, -3.3799, -3.4140, -3.2733, -3.3075,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.3044, -3.3381, -3.3716, -3.4050,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.6987, -3.7306, -3.7624, -3.7940, -3.8255, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.7337, 6.5433, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.0226, 7.1857, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.7362, 10.8498, 10.7517,\n 10.6547, 10.5587, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.3277, 12.4286, 12.5289, 12.4395, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.5615, 12.4746, 12.5732, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.2046, 14.1227, 14.2121, 14.1309,\n 14.0502, 13.9700, 14.0593, 14.1482, 14.0687, 14.1573, 14.0784, 14.1667,\n 14.2546, 14.3422, 14.4294, 14.5162, 14.6027, 14.6889, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is it a bad time to buy a condo or a house in the Bay Area in 2017?\nQuestion 2: Would 2017 be a good time to buy a house in Bay Area?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "25.0%", + "z-score": "0", + "p value": "0.5", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.1783, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.4816, 3.3566, 3.2348, 3.1160, 3.0000,\n 2.8868, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 3.1918, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.7700, 3.9595, 4.1461, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.0680, 4.9666, 5.1333, 5.2981, 5.4610, 5.6220, 5.5213, 5.4222,\n 5.5811, 5.4832, 5.3867, 5.2915, 5.1977, 5.1051, 5.2614, 5.4160,\n 5.3243, 5.4772, 5.3865, 5.5377, 5.4480, 5.5976, 5.7458, 5.6569,\n 5.5690, 5.7155, 5.6285, 5.7735, 5.9172, 6.0596, 5.9732, 6.1143,\n 6.2541, 6.3928, 6.5303, 6.4444, 6.3595, 6.2755, 6.4116, 6.5465,\n 6.6804, 6.8133, 6.9451, 6.8615, 6.7788, 6.9094, 6.8274, 6.9570,\n 6.8757, 6.7952, 6.7155, 6.6365, 6.5583, 6.4807, 6.4039, 6.5320,\n 6.4558, 6.5828, 6.7090, 6.8343, 6.7585, 6.8828, 6.8076, 6.7330,\n 6.6591, 6.5857, 6.5130, 6.6361, 6.5639, 6.4923, 6.4213, 6.3509,\n 6.2810, 6.2116, 6.1429, 6.0746, 6.0069, 6.1283, 6.0609, 5.9941,\n 5.9279, 6.0481, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.5967, 5.5340, 5.4718, 5.4100, 5.3487, 5.4670, 5.5846, 5.5234,\n 5.6403, 5.7565, 5.6955, 5.6349, 5.7503, 5.6899, 5.6300, 5.5705,\n 5.6849, 5.6256, 5.5668, 5.5082, 5.4501, 5.3923, 5.3349, 5.4480,\n 5.3909, 5.3340, 5.4464, 5.3898, 5.3335, 5.4451, 5.5562, 5.6667,\n 5.6104, 5.7203, 5.6643, 5.6085, 5.5532, 5.4981, 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How is vanilla extract made?\nQuestion 2: How do you make sugar cookies without vanilla extract?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.2357,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.0553, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -0.9218, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.6376, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.3386, 9.4705, 9.3550,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 10.7835, 10.8995, 10.7955, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.9288, 12.0357, 11.9370, 12.0433,\n 12.1491, 12.0516, 12.1568, 12.0605, 12.1652, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.2868, 12.1936, 12.2963, 12.3985, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.1050, 13.2025, 13.2995,\n 13.2093, 13.3059, 13.2166, 13.3128, 13.2243, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.3361, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.2633, 14.1781, 14.2686, 14.3587, 14.2744,\n 14.3642, 14.2805, 14.3700, 14.2870, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.1761, 15.2609, 15.3454, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the Sahara, and how do the average temperatures there compare to the ones in the Patagonian Desert?\nQuestion 2: What is the Sahara, and how do the average temperatures there compare to the ones in the Registan Desert?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "28.9%", + "z-score": "1.12", + "p value": "0.131", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, 0.1873, 0.4623, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.4003, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.1615, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.2010, 0.3504, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.5855, 0.5348, 0.6783, 0.6276, 0.7698,\n 0.9110, 1.0512, 1.0000, 1.1390, 1.2771, 1.2257, 1.1746, 1.1239])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.5556, 7.4655, 7.5967, 7.5076, 7.6376,\n 7.5494, 7.4622, 7.3758, 7.5048, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.9254, 7.8406, 7.9649, 8.0882, 8.0042, 8.1266, 8.2483, 8.3691,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.5607, 8.4788, 8.3977, 8.5153,\n 8.6321, 8.5516, 8.6677, 8.7831, 8.8978, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.0786, 9.0000, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.4752, 9.3979, 9.3212, 9.4299, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.9648, 10.0701, 10.1749, 10.0987, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.1846, 10.2872, 10.2132, 10.1398, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.2273, 10.1558, 10.0848, 10.0143, 9.9442, 9.8746,\n 9.9752, 10.0753, 10.0061, 10.1058, 10.0371, 9.9687, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.2949, 10.2273, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why was the Roman Empire so successful?\nQuestion 2: What are some of the rarely known facts about the Roman Empire?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "41", + "# Tokens in Greenlist": "11", + "Fraction of T in Greenlist": "26.8%", + "z-score": "0.271", + "p value": "0.393", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n 0.2705])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "95", + "Fraction of T in Greenlist": "47.7%", + "z-score": "7.41", + "p value": "6.42e-14", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 2.8868,\n 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641, 3.2206, 3.5382,\n 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998, 3.1177, 3.3968, 3.2222,\n 3.0551, 2.8947, 2.7406, 2.5924, 2.8577, 3.1156, 3.3665, 3.6108, 3.8490,\n 3.7017, 3.5590, 3.4207, 3.2863, 3.1558, 3.3853, 3.6098, 3.4816, 3.7009,\n 3.5753, 3.4528, 3.3333, 3.2167, 3.1027, 3.3147, 3.5228, 3.7273, 3.9284,\n 4.1260, 4.0119, 3.9001, 3.7905, 3.6831, 3.8759, 4.0657, 4.2528, 4.1461,\n 4.3301, 4.5115, 4.4061, 4.5847, 4.4809, 4.6568, 4.5544, 4.4537, 4.6268,\n 4.5274, 4.4296, 4.3333, 4.2385, 4.1451, 4.0531, 3.9624, 3.8730, 3.7849,\n 3.6979, 3.8657, 3.7796, 3.9452, 3.8600, 3.7758, 3.6927, 3.8555, 4.0166,\n 4.1761, 4.3339, 4.4901, 4.6448, 4.5611, 4.4783, 4.3966, 4.3158, 4.4680,\n 4.3879, 4.5384, 4.6876, 4.6079, 4.5291, 4.6765, 4.8226, 4.9675, 5.1111,\n 5.2535, 5.1745, 5.0964, 5.0190, 5.1597, 5.2992, 5.4377, 5.3606, 5.4977,\n 5.4212, 5.3455, 5.4813, 5.4061, 5.5407, 5.4661, 5.5995, 5.5255, 5.6578,\n 5.7892, 5.7155, 5.8458, 5.7726, 5.7001, 5.6282, 5.5570, 5.4863, 5.6150,\n 5.7429, 5.8698, 5.9960, 6.1213, 6.0506, 5.9805, 5.9109, 6.0351, 5.9660,\n 5.8974, 6.0205, 5.9524, 6.0746, 6.0069, 5.9397, 5.8730, 5.8068, 5.7411,\n 5.8621, 5.9822, 6.1017, 6.2205, 6.3385, 6.2728, 6.2075, 6.1427, 6.2598,\n 6.1954, 6.3117, 6.4274, 6.3632, 6.4781, 6.4143, 6.3509, 6.2879, 6.2253,\n 6.1632, 6.2770, 6.3902, 6.5029, 6.6150, 6.7264, 6.6642, 6.6024, 6.5410,\n 6.6517, 6.5906, 6.7006, 6.8101, 6.9190, 7.0273, 6.9663, 7.0741, 7.0133,\n 7.1205, 7.2272, 7.3333, 7.2728, 7.2125, 7.3180, 7.2581, 7.3631, 7.4676,\n 7.4078])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is it advisable to sent your cv to a professor with whom you are trying to make contact for graduate admission?\nQuestion 2: How does contacting a professor before applying factor into graduate admissions?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -1.9868, -1.5492, -1.6378, -1.2309, -1.3242, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.4317, 1.6667,\n 1.5671, 1.4697, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 1.2366, 1.4434,\n 1.6471, 1.8477, 2.0455, 2.2404, 2.1546, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.5475, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.9829, 1.9149, 2.0785, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.7143, 1.8716, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.2540, -0.2955, -0.1684, -0.2100, -0.0838, -0.1253, 0.0000,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.1234, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.3485, 7.1952, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.5707, 7.4294, 7.2910, 7.4536, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 7.9196, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.3702,\n 8.5067, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.5448, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.8529, 8.7600, 8.8833, 9.0057, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.6566, 9.7725, 9.6828, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.7622, 9.8753, 9.7890, 9.9015,\n 10.0133, 9.9278, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.9689, 10.8867, 10.8051, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 11.0569, 10.9773, 10.8984, 10.8200, 10.9220, 10.8443, 10.9458, 11.0468,\n 10.9697, 11.0702, 10.9936, 10.9176, 11.0177, 11.1173, 11.0418, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Does a RainTPL syntax extension for Sublime Text exist? Where can I download it?\nQuestion 2: How do I hide the find/replace bar in Sublime Text 2?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.0381, 1.8889, 2.1822, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.0494, 1.9245, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 2.2011, 2.0889, 1.9795, 1.8728, 2.1054, 2.0000,\n 1.8970, 1.7963, 2.0211, 1.9215, 1.8240, 1.7285, 1.9462, 2.1602,\n 2.0647, 1.9711, 1.8791, 2.0870, 1.9959, 2.1997, 2.4004, 2.3094,\n 2.5064, 2.7005, 2.8919, 2.8006, 2.7107, 2.8983, 2.8093, 2.9938,\n 2.9057, 2.8189, 3.0000, 2.9140, 2.8292, 3.0071, 2.9231, 2.8402,\n 2.7585, 2.6778, 2.8518, 2.7717, 2.9433, 2.8638, 2.7854, 2.7080,\n 2.6316, 2.5560, 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.5207, 2.4495, 2.3791, 2.3094, 2.4703, 2.6296, 2.7875, 2.9439,\n 2.8735, 2.8039, 2.9582, 3.1111, 3.0415, 2.9726, 3.1236, 3.0551,\n 2.9872, 2.9200, 2.8534, 3.0019, 3.1492, 3.0827, 3.0168, 2.9515,\n 3.0967, 3.0317, 2.9673, 2.9035, 3.0467, 2.9832, 3.1251, 3.2660,\n 3.2025, 3.1395, 3.0770, 3.0151, 3.1539, 3.2918, 3.2299, 3.1685,\n 3.1076, 3.2437, 3.1831, 3.1229, 3.2577, 3.3915, 3.5245, 3.6566,\n 3.7878, 3.9181, 3.8571, 3.9865, 4.1150, 4.0541, 3.9936, 4.1210,\n 4.0608, 4.0011, 3.9418, 3.8829, 3.8244, 3.7664, 3.7087, 3.6515,\n 3.7766, 3.9010, 3.8438, 3.7870, 3.7306, 3.8538, 3.7975, 3.7417,\n 3.8638, 3.9853, 3.9294, 4.0501, 4.1700, 4.2893, 4.4080, 4.5260,\n 4.6434, 4.5871, 4.5311, 4.4754, 4.4202, 4.3652, 4.3106, 4.2563,\n 4.2023, 4.1487, 4.0953, 4.0423, 4.1576, 4.2723, 4.2193, 4.1667,\n 4.2805, 4.2280, 4.1758, 4.2889, 4.2369, 4.1851, 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.0034, 4.8857, 5.0684, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.7469, 6.6469,\n 6.7931, 6.6944, 6.8391, 6.7416, 6.6454, 6.7886, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.2532, 7.3901, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.2001, 7.3333, 7.2443, 7.3765, 7.5076, 7.4194,\n 7.5494, 7.4622, 7.3758, 7.5048, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.7952, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.9783, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.7908, 9.7109, 9.8197, 9.7405,\n 9.6619, 9.5840, 9.6921, 9.7997, 9.9067, 10.0131, 9.9357, 10.0416,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.0231, 10.1273, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.0414, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the average salary of a microbiologist in India?\nQuestion 2: What is the salary for a psychologist in india?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.0887,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.3333, -0.3797, -0.4257, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -0.9520, -0.9925, -1.0328, -1.0729, -0.9415, -0.9816, -1.0215,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.0064, 8.1684, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.3391, 8.1929, 8.3480, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.2075, 10.3347, 10.2132, 10.3397,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.5492,\n 11.6631, 11.5519, 11.6652, 11.5556, 11.6683, 11.5601, 11.6723, 11.7838,\n 11.8944, 11.7881, 11.6829, 11.7932, 11.9029, 11.7992, 11.9083, 12.0167,\n 11.9144, 11.8132, 11.9213, 12.0286, 12.1353, 12.0357, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.5657, 12.4689, 12.5717, 12.4759,\n 12.3809, 12.2868, 12.3895, 12.4915, 12.5930, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.5188, 12.4286, 12.3391, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.5615, 12.4746, 12.5732, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.1957, 13.1129, 13.2068, 13.3002, 13.3933, 13.3113, 13.2299,\n 13.3227, 13.2419, 13.3343, 13.2542, 13.1746, 13.2668, 13.1878, 13.2796,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.5897, 13.5131, 13.6025, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do you close a Bank of America account?\nQuestion 2: How can one close a bank account online?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.3744, 1.6013, 1.5068, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.7889, 1.7002, 1.6131, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.6667, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.7321, 1.6577, 1.5843, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.6828, 1.6166, 1.5511, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.6823, 1.6186, 1.7778, 1.9355, 1.8716, 2.0276, 2.1822,\n 2.1182, 2.0548, 2.2074, 2.1442, 2.2952, 2.2323, 2.3817, 2.3190,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.2454,\n 2.1858, 2.1268, 2.2699, 2.2111, 2.3529, 2.4938, 2.4348, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.3422, 2.2852, 2.2287, 2.3657, 2.3094,\n 2.2535, 2.3891, 2.3333, 2.4678, 2.6014, 2.5456, 2.6781, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.3500, 2.4803, 2.4267, 2.3735,\n 2.5026, 2.4495, 2.5776, 2.7050, 2.6519, 2.7783, 2.7253, 2.6726,\n 2.6203, 2.5683, 2.5166, 2.4653, 2.5898, 2.5386, 2.4877, 2.6112,\n 2.5604, 2.6830, 2.8050, 2.7541, 2.8752, 2.8245, 2.7741, 2.7240,\n 2.6742, 2.6247, 2.5754, 2.6949, 2.6458, 2.5969, 2.7154, 2.6667,\n 2.7844, 2.9016, 2.8528, 2.9692, 2.9205, 2.8721, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.1813, 7.0812, 7.2232, 7.3638, 7.2650, 7.1674, 7.0711,\n 6.9759, 7.1152, 7.0211, 7.1591, 7.2960, 7.2029, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.9839, 7.8948, 7.8065, 7.7192, 7.8463, 7.9724, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.1731, 8.0882, 8.2107, 8.3324, 8.2483, 8.1650,\n 8.0824, 8.2032, 8.3231, 8.4423, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.6321, 8.7482, 8.8636, 8.7831, 8.7033, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.6963, 8.6190, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.2118, 9.3212, 9.2450, 9.1694, 9.0944, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.6532, 9.7574, 9.8611, 9.9642, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.7527, 10.8505, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.3120, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How can I create the most popular question on Quora?\nQuestion 2: Which are the most popular questions on Quora presently that also create the most discussions?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.3564, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.6550, -2.6961, -2.7369, -2.5717, -2.6128, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.6960, -2.5456, -2.5841, -2.6224,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.7815, -2.8174, -2.8532, -2.8887, -2.9241, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 8.8667, 8.7419, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.6603, 8.5435, 8.4285, 8.5672, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.3411, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.6218, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 10.8012, 10.7074, 10.6145,\n 10.7257, 10.8363, 10.7444, 10.8544, 10.9637, 10.8729, 10.9816, 10.8916,\n 10.8025, 10.7141, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.0102, 11.1151, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.7031, 11.6219, 11.5414, 11.6412, 11.7405, 11.8393, 11.9377,\n 12.0355, 12.1329, 12.0532, 12.1502, 12.0712, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.5049, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 12.9087, 13.0000,\n 12.9247, 12.8499, 12.9410, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the requirements to become president in the United States and how are the requirements different in Tanzania?\nQuestion 2: What are the requirements to become president in the United States and how are the requirements different in France?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.0719, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.0792, 2.9424, 2.8098, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.9614, 3.8431, 3.7273, 3.6141, 3.8146, 3.7033,\n 3.9001, 4.0937, 3.9837, 4.1740, 4.3614, 4.2528, 4.1461, 4.3301,\n 4.5115, 4.6904, 4.8669, 4.7610, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.5274, 4.6981, 4.6000, 4.5034, 4.6715, 4.8375, 5.0017, 5.1640,\n 5.3245, 5.2281, 5.1332, 5.2915, 5.1977, 5.1051, 5.0138, 5.1698,\n 5.0795, 5.2338, 5.3865, 5.2970, 5.4480, 5.5976, 5.5088, 5.4212,\n 5.5690, 5.7155, 5.8606, 6.0044, 5.9172, 5.8310, 5.9732, 5.8878,\n 5.8034, 5.7199, 5.8605, 5.7778, 5.6959, 5.8351, 5.9732, 6.1101,\n 6.2459, 6.3807, 6.2991, 6.2183, 6.3517, 6.2716, 6.1923, 6.1137,\n 6.2459, 6.1680, 6.2990, 6.4291, 6.3517, 6.4807, 6.6089, 6.5320,\n 6.4558, 6.5828, 6.7090, 6.8343, 6.9587, 6.8828, 6.8076, 6.9310,\n 6.8564, 6.7823, 6.7089, 6.8313, 6.7584, 6.6861, 6.8075, 6.9282,\n 7.0481, 7.1673, 7.2857, 7.2136, 7.1421, 7.2596, 7.1886, 7.1181,\n 7.0481, 7.1647, 7.0952, 7.2111, 7.3263, 7.2572, 7.3717, 7.4855,\n 7.4168, 7.3485, 7.4616, 7.5740, 7.6859, 7.7971, 7.7291, 7.6615,\n 7.7720, 7.7048, 7.6381, 7.5719, 7.6816, 7.6158, 7.5503, 7.6594,\n 7.7679, 7.8759, 7.9833, 8.0902, 8.0249, 7.9601, 8.0663, 8.0018,\n 7.9377, 7.8740, 7.9796, 7.9162, 8.0212, 8.1258, 8.0627, 8.1667,\n 8.2702, 8.2074, 8.1449, 8.2479, 8.3503, 8.4523, 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: \"Who is the Adam the lyrics to Blink-182's \"\"Adam's Song\"\" are written about?\"\nQuestion 2: \"Which Blink-182 band member wrote the lyrics to \"\"Adam's Song\"\"?\"\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "72", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "29.2%", + "z-score": "0.816", + "p value": "0.207", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.3744, 1.2810, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.1779, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 5.0623, 4.9316, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 5.8919, 6.0622,\n 5.9438, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.4006, 6.5514, 6.7006, 6.5993, 6.7469, 6.6469,\n 6.7931, 6.6944, 6.5970, 6.7416, 6.6454, 6.7886, 6.9305, 6.8354,\n 6.9759, 6.8819, 6.7890, 6.9282, 6.8364, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.3333, 7.4655, 7.3765, 7.2884, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.2904, 7.2058, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.7566, 7.6734, 7.7976, 7.7152, 7.6335, 7.7567,\n 7.8791, 8.0006, 7.9196, 8.0403, 8.1602, 8.0798, 8.1989, 8.1192,\n 8.2375, 8.1585, 8.2760, 8.3927, 8.3143, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.5052, 8.4286, 8.5424, 8.4664, 8.3910, 8.5041, 8.6166,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.9851, 8.9107, 9.0200, 9.1287,\n 9.0548, 8.9815, 9.0895, 9.0167, 9.1242, 9.2311, 9.1587, 9.0869,\n 9.1932, 9.2990, 9.4042, 9.5089, 9.4375, 9.5416, 9.6452, 9.7483,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.8054, 9.7367, 9.6684, 9.7690, 9.8691, 9.9687, 10.0679, 10.1667,\n 10.0987, 10.1970, 10.2949, 10.3923, 10.4893, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What books should I read as an aspiring entrepreneur?\nQuestion 2: What are the top books an aspiring teen entrepreneur should read?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.0247, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.4874, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.3190,\n -2.3619, -2.4045, -2.2385, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.4574, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.4678, -2.5068, -2.5456, -2.3962, -2.2478,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.2680, -2.1254, -2.1640, -2.2024, -2.2406, -2.0998, -2.1381,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.5618, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.0779, 4.9373, 5.1371, 5.0000,\n 4.8662, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.9472, 7.8296, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.8007, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.5784, 9.4803, 9.3834,\n 9.5047, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.7473, 9.6566, 9.7725, 9.8877, 9.7980,\n 9.7091, 9.8236, 9.9373, 10.0504, 9.9625, 9.8753, 9.9878, 9.9015,\n 9.8159, 9.9278, 10.0389, 9.9542, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.0611, 10.1692, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.9773, 10.8984, 10.8200, 10.9220, 10.8443, 10.9458, 10.8686,\n 10.7920, 10.7159, 10.6404, 10.7415, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.7451, 10.8444, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.6179, 11.7120, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why does a rainbow lorikeet make a good pet?\nQuestion 2: Can lorikeets be good pets? What are some ways to take good care of them?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 0.9661, 1.1648, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.8295, 0.7707, 0.9316, 0.8729,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.7593, 0.7095, 0.8485, 0.9867, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 1.0598, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.9382, 0.8909,\n 1.0215, 1.1513, 1.1038, 1.2326, 1.1852, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.4403, 1.6187, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.7552, 1.6854, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 1.0105, 0.9615, 0.9129,\n 1.0465, 0.9979, 0.9497, 1.0820, 1.2136, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.3284, 1.4570, 1.5848, 1.5363, 1.4881, 1.4402, 1.5667,\n 1.6925, 1.8175, 1.7693, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.4093, 1.5298, 1.4846, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which sector (i.e. primary, secondary, tertiary) contributes the most to the GDP of India? Why?\nQuestion 2: What is my contribution to the GDP?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.3%", + "z-score": "-0.864", + "p value": "0.806", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.3333, 0.5298, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.7396, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.6783, -0.7213, -0.5843, -0.6274, -0.6702, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -0.9509, -0.9897, -0.8638])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 4.6188,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.5435, 8.4285, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.0855, 9.9817, 9.8792, 9.7778, 9.8987, 9.7986, 9.9187, 9.8198,\n 9.7219, 9.8414, 9.7447, 9.6490, 9.5543, 9.4606, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.7622, 9.8753, 9.7890, 9.9015,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.3085, 10.2253, 10.3333, 10.4407, 10.5475, 10.4652, 10.5714, 10.4898,\n 10.5955, 10.7006, 10.6196, 10.5393, 10.6439, 10.7480, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.2816, 11.3820, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.7604, 11.8571, 11.9534, 12.0493, 12.1447, 12.0685, 12.1635, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.2209, 12.1468, 12.2403, 12.3333,\n 12.2598, 12.1867, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: If universe is expanding without a limit and dark and vacuum energy are created as it expands\u2026?\nQuestion 2: If universe can expand without limit and it creates dark/vacuum/gravitational energy with it,then is the potential energy infinite?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "19.3%", + "z-score": "-1.2", + "p value": "0.886", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.7889, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.3208, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.0949, 9.2424, 9.3881, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 10.2172, 10.0750, 10.2093, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.4614, 10.3288, 10.1982, 10.3280,\n 10.4565, 10.5838, 10.7099, 10.8350, 10.7084, 10.5837, 10.7084, 10.5859,\n 10.7098, 10.8327, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.4311, 11.5476, 11.4311, 11.5470, 11.4323, 11.3189, 11.4345, 11.5492,\n 11.6631, 11.7762, 11.6652, 11.5556, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 12.0044, 11.8982, 12.0077, 12.1164, 12.2244, 12.1200, 12.2275,\n 12.1244, 12.2314, 12.1295, 12.2360, 12.3419, 12.4471, 12.3468, 12.4516,\n 12.5557, 12.4567, 12.5604, 12.4625, 12.3655, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.4834, 12.5853, 12.6867, 12.5930, 12.6939, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.2093, 13.3059, 13.4021, 13.3128, 13.4086, 13.5039, 13.5987, 13.6931,\n 13.7870, 13.8804, 13.7926, 13.8857, 13.9784, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.0813, 14.1725, 14.2633, 14.1781, 14.2686, 14.1842, 14.2744,\n 14.3642, 14.2805, 14.3700, 14.4591, 14.5479, 14.6362, 14.7242, 14.8119,\n 14.8991, 14.8167, 14.9037, 14.9903, 14.9086, 14.9950, 15.0810, 15.1667,\n 15.0858, 15.1712, 15.2563, 15.1761, 15.2609, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How is India preparing for war against Pakistan?\nQuestion 2: Is India preparing for a war?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.5453, -1.4087, -1.4485, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.7039, -1.7410, -1.7778, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "201", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "46.8%", + "z-score": "7.13", + "p value": "5.15e-13", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712, 3.5796, 3.8497, 3.6667,\n 3.9279, 3.7524, 3.5839, 3.8367, 3.6742, 3.5176, 3.3665, 3.6108, 3.4641,\n 3.3221, 3.5590, 3.4207, 3.2863, 3.1558, 3.3853, 3.2577, 3.1334, 3.0123,\n 3.2348, 3.1160, 3.3333, 3.5466, 3.7559, 3.6380, 3.5228, 3.4101, 3.6141,\n 3.5032, 3.3947, 3.2883, 3.4873, 3.6831, 3.8759, 3.7700, 3.9595, 3.8552,\n 3.7528, 3.9386, 4.1219, 4.3026, 4.4809, 4.3788, 4.5544, 4.7278, 4.6268,\n 4.7977, 4.6981, 4.8667, 4.7683, 4.6715, 4.8375, 4.7419, 4.9058, 5.0679,\n 4.9731, 5.1332, 5.2915, 5.1977, 5.1051, 5.2614, 5.1698, 5.3243, 5.2338,\n 5.1444, 5.0562, 5.2086, 5.1212, 5.2719, 5.4212, 5.5690, 5.4822, 5.3964,\n 5.3116, 5.4576, 5.3736, 5.2906, 5.4349, 5.5780, 5.4956, 5.4140, 5.3333,\n 5.2535, 5.3947, 5.3156, 5.4554, 5.5942, 5.5155, 5.4377, 5.3606, 5.2842,\n 5.4212, 5.3455, 5.2705, 5.4061, 5.5407, 5.6743, 5.5995, 5.7320, 5.8635,\n 5.7892, 5.9196, 5.8458, 5.7726, 5.7001, 5.8292, 5.7572, 5.6858, 5.8138,\n 5.9409, 6.0671, 5.9960, 6.1213, 6.2458, 6.1750, 6.2985, 6.2282, 6.1584,\n 6.0892, 6.2116, 6.1429, 6.2644, 6.3853, 6.5054, 6.4368, 6.3688, 6.3013,\n 6.4203, 6.3532, 6.2866, 6.4048, 6.5223, 6.6391, 6.5727, 6.5067, 6.6227,\n 6.5571, 6.6724, 6.6072, 6.5424, 6.6568, 6.5924, 6.7061, 6.8192, 6.7551,\n 6.8675, 6.9793, 6.9155, 6.8520, 6.9631, 6.9000, 7.0104, 6.9477, 6.8853,\n 6.8233, 6.9330, 6.8713, 6.9803, 7.0888, 7.1967, 7.1352, 7.0741, 7.0133,\n 7.1205, 7.0601, 7.0000, 7.1065, 7.2125, 7.1527, 7.0932, 7.0340, 6.9752,\n 7.0804, 7.0219, 7.1266])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why we do study computer fundamental in software engineering.?\nQuestion 2: Do we get to chose only one computer language when we are studying engineering?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.0479, -0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.3522, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.2170, -0.0865, -0.1295, 0.0000, 0.1287, 0.0856, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 5.4306, 5.1640, 5.4175, 5.1698, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 7.8174, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.4705, 9.3550,\n 9.2410, 9.3721, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.2522, 11.3644, 11.4759, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.6041, 11.5048, 11.4065, 11.5157, 11.4184, 11.5271, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.9754, 12.0798,\n 12.1836, 12.0902, 12.1936, 12.2963, 12.2040, 12.3063, 12.2150, 12.3168,\n 12.4181, 12.3277, 12.2381, 12.3391, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.2410, 13.3361, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.2633, 14.3537, 14.4437, 14.5333, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.5479, 14.6362, 14.7242, 14.8119,\n 14.8991, 14.8167, 14.9037, 14.9903, 15.0766, 14.9950, 15.0810, 15.1667,\n 15.0858, 15.0054, 15.0909, 15.1761, 15.2609, 15.3454, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the smartest question besides this question?\nQuestion 2: What are the smartest questions that you have heard?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.4027, 1.3308, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.3460, 1.2865, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.4289,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.7461, 0.7029, 0.6598, 0.6170, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.0012, 3.8490, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 6.8458, 6.7333, 6.8876, 7.0401, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.0822, 8.2178, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.5543, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.2514, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.1151, 11.2194, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.6412, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.3935, 12.4880, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.9691, 12.8928, 12.8169, 12.7416, 12.8333,\n 12.9247, 12.8499, 12.9410, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is a good text editor?\nQuestion 2: What are the best text editor plugins?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.183", + "p value": "0.428", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.4697, -1.5361, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.2349, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.2289, 0.1826])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.1929, 8.3480, 8.2054, 8.0656, 7.9286, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.0577, 9.9601, 9.8634, 9.7678, 9.6732, 9.7912, 9.9085,\n 9.8150, 9.9315, 10.0472, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.3721, 11.4762, 11.5797, 11.4935, 11.5966, 11.5111, 11.6137,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.3603, 12.4575, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.1617, 13.0821, 13.0030, 12.9244, 12.8464, 12.7688,\n 12.8616, 12.9540, 12.8771, 12.9691, 12.8928, 12.9845, 13.0758, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What people who you've never met have influenced your life the most?\nQuestion 2: Who are people you have never met who have had the greatest influence on your life?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.0290, -1.0809, -1.1323,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.2010, -0.2503, -0.0998, -0.1491, -0.1980,\n -0.0493, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.1790, -1.2179, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.6108, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 4.8038, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.6086, 5.5035, 5.4000, 5.5630, 5.4610, 5.6220, 5.5213, 5.6804,\n 5.5811, 5.7382, 5.6401, 5.5435, 5.6986, 5.6032, 5.7566, 5.6622,\n 5.8139, 5.9641, 6.1128, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.5569, 6.4663, 6.3768, 6.5169, 6.6559, 6.5672,\n 6.7049, 6.8414, 6.9768, 7.1111, 7.2443, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.8463, 7.9724, 7.8859, 8.0111,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.5381, 8.4532, 8.3691,\n 8.4891, 8.4057, 8.3231, 8.4423, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.3550, 8.4718, 8.5879, 8.7033, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.6963, 8.8095, 8.9221, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.6456, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.9940, 9.9184, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.6455, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 11.0663,\n 10.9939, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.2376, 11.3333,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.4765, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How much weed did Bob Marley smoke a day?\nQuestion 2: How much weed should you smoke?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.9415, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 1.1896, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.3641, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.5475, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.6164, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.9437, 1.8773, 2.0381,\n 1.9720, 1.9066, 1.8419, 2.0000, 2.1567, 2.0918, 2.2468, 2.1822,\n 2.1182, 2.0548, 2.2074, 2.3586, 2.2952, 2.4449, 2.3817, 2.3190,\n 2.2569, 2.4045, 2.5508, 2.4887, 2.6336, 2.5717, 2.5103, 2.4495,\n 2.5925, 2.7344, 2.6735, 2.6131, 2.7534, 2.6933, 2.6336, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.3422, 2.2852, 2.2287, 2.1726, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732,\n 1.8204, 1.7679, 1.9013, 1.8490, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 0.9062, 1.0328, 0.9870, 0.9415, 0.8963, 1.0215,\n 1.1461, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.8812, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.7931, 6.6944, 6.5970, 6.7416, 6.6454, 6.5504, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.6973, 6.6066, 6.5169, 6.6559, 6.7937,\n 6.7049, 6.8414, 6.9768, 7.1111, 7.0231, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.7598, 7.6742, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.8808, 8.0042, 8.1266, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.7610, 8.8778, 8.7952, 8.9113,\n 8.8294, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.9319, 9.0452,\n 9.1577, 9.0786, 9.1905, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.9067, 10.0131, 9.9357, 9.8590,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.7189, 10.6455, 10.7451, 10.8444, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.0450, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the next number of this sequence? 5\u2026..17\u2026\u202637\u2026..89\u2026..??\nQuestion 2: What is the next number in the sequence?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "5", + "# Tokens in Greenlist": "1", + "Fraction of T in Greenlist": "20.0%", + "z-score": "-0.258", + "p value": "0.602", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.7047, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.2488, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.0060, 8.9178, 8.8304, 8.7439, 8.6581, 8.7773,\n 8.6924, 8.8108, 8.9285, 9.0453, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.5638, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 9.9800, 9.8995, 9.8197, 9.9278,\n 10.0353, 9.9562, 9.8776, 9.7997, 9.7224, 9.8293, 9.9357, 10.0416,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.2029, 10.1273, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.4909, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.6455, 10.5725, 10.6722, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which website is good for downloading Android (.apk) files?\nQuestion 2: Android Application Development: Which software is used to develop APK files?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.8372, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.4565, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.3687, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.4393, -1.4777, -1.5159, -1.5539, -1.4241, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "71.1%", + "z-score": "13.1", + "p value": "1.4e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.6398, 6.4846, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.6061, 10.7211, 10.6218, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.5930, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.9011, 13.0000, 12.9085, 13.0071, 13.1050, 13.2025, 13.1122])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Are vacuum fluctuations occuring because the energy in vacuum? Are these virtual particles?\nQuestion 2: Do virtual particles and energy in vacuum really exist? Can we observe them? Where? When?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.9640, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.4142,\n -1.4655, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.9555, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.5053, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.5347, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.1698, 4.9358, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.1654, 8.0064, 8.1684, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.2376,\n 9.3811, 9.2387, 9.3811, 9.2418, 9.3831, 9.2469, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.1333, 9.0068, 9.1455, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.3532, 10.2375, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.5131, 10.6329, 10.5243, 10.4169,\n 10.3109, 10.2061, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.6139, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.0516, 11.9551, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.7875, 12.8877, 12.7943, 12.7017,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.7376,\n 12.6492, 12.5615, 12.4746, 12.5732, 12.6713, 12.7690, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.2593, 13.3537, 13.4477, 13.3631,\n 13.4567, 13.3728, 13.4661, 13.3829, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.6698, 13.7612, 13.6796, 13.5985, 13.5179, 13.4380, 13.3585, 13.2796,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 14.0000,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I find my Publication ID on Medium?\nQuestion 2: Still not understanding Qoura?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.6803, -2.7406, -2.3850, -2.0412,\n -2.1106, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.4547, -0.2582,\n -0.0642, -0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.6768, 0.6222, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.9739, 0.9245, 1.0598, 1.0105, 0.9615, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.6598, 0.6170, 0.5744, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 1.7321, 1.5403, 1.9052,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.5281, 2.8284, 3.1177, 2.9439, 2.7778,\n 3.0551, 3.3235, 3.5839, 3.8367, 4.0825, 3.9196, 4.1586, 4.0012, 4.2339,\n 4.0814, 4.3083, 4.5301, 4.3818, 4.2378, 4.0980, 3.9620, 3.8297, 3.7009,\n 3.5753, 3.7897, 4.0000, 4.2064, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712,\n 4.7488, 4.6291, 4.5118, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.0190,\n 4.9075, 4.7980, 4.9747, 4.8669, 4.7610, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.5274, 4.4296, 4.6000, 4.5034, 4.6715, 4.8375, 4.7419, 4.9058, 5.0679,\n 4.9731, 5.1332, 5.2915, 5.1977, 5.1051, 5.0138, 4.9237, 5.0795, 4.9904,\n 4.9023, 4.8154, 4.9691, 4.8830, 4.7980, 4.7140, 4.6311, 4.7823, 4.7001,\n 4.8497, 4.9980, 4.9163, 5.0630, 5.2085, 5.1273, 5.2713, 5.4140, 5.5556,\n 5.6959, 5.6149, 5.5348, 5.4554, 5.5942, 5.5155, 5.4377, 5.3606, 5.4977,\n 5.6338, 5.7689, 5.9029, 6.0359, 6.1680, 6.2990, 6.2217, 6.1451, 6.0693,\n 5.9941, 6.1237, 6.0491, 6.1777, 6.3054, 6.4322, 6.5582, 6.4838, 6.6088,\n 6.7330, 6.8564, 6.9789, 7.1007, 7.0265, 6.9529, 6.8799, 7.0007, 6.9282,\n 7.0481, 7.1673, 7.0952, 7.0238, 6.9529, 6.8825, 7.0006, 6.9307, 7.0481,\n 7.1647, 7.0952, 7.2111, 7.3263, 7.4409, 7.3717, 7.4855, 7.5988, 7.7114,\n 7.8233, 7.9347, 7.8657, 7.9764, 8.0865, 8.0178, 7.9497, 8.0591, 7.9913,\n 8.1001, 8.0328, 8.1410, 8.2486, 8.3557, 8.4623, 8.3952, 8.3286, 8.2624,\n 8.3683, 8.3024, 8.4078, 8.5126, 8.4471, 8.3820, 8.3173, 8.2531, 8.3572,\n 8.2933, 8.3969, 8.5000, 8.4364, 8.5390, 8.6411, 8.7427, 8.6794, 8.7805,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How could an HIV-positive person have children?\nQuestion 2: Would you marry an HIV positive person?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.687", + "p value": "0.754", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.9966, -0.8165, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.4724, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 6.9307, 6.7489, 6.5727,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 6.8718, 6.7254, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.4061, 7.2815, 7.4370, 7.5907, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.7047, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.6210, 7.5258, 7.4316, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.2001, 7.3333, 7.4655, 7.5967, 7.5076, 7.4194,\n 7.5494, 7.6785, 7.5912, 7.5048, 7.6328, 7.7598, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 9.0453, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.9249, 9.8431, 9.9524, 10.0611, 10.1692, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.9773, 10.8984, 10.8200, 10.7423, 10.8443, 10.9458, 10.8686,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.2924, 11.2164, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.4581, 11.5549, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.0000,\n 11.9273, 12.0209, 12.1141, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: I'm going to be US President one day. What should I start doing now to achieve this?\nQuestion 2: I'm 16 and I want to become the US president someday. What should I start doing?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.4364,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.3311, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.3109, 0.4428, 0.3974, 0.5283, 0.4828, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.7789, 0.7336, 0.6885, 0.8154, 0.7703, 0.8963, 1.0215,\n 0.9763, 0.9313, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.9620, 3.8297, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.2351, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 9.7912, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.4592, 11.3721, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.9341, 12.0341, 12.1335, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.3603, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.5979, 12.5179, 12.6130, 12.7077,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.6667,\n 13.7559, 13.6789, 13.7679, 13.8564, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why MS Dhoni leave captaincy of ODI & T-20?\nQuestion 2: Why does M.S Dhoni left captaincy for ODI and T20?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.0641, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.3825, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.6681, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -2.9289, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -2.8333,\n -2.8675, -2.7358, -2.7701, -2.8043, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 4.3083, 4.1603, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.1118, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.1471, 6.2994, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.6454, 6.5504, 6.6935, 6.5997,\n 6.5069, 6.4153, 6.5569, 6.6973, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.7049, 6.8414, 6.7536, 6.8889, 6.8019, 6.9361, 7.0692, 6.9830,\n 6.8977, 6.8133, 6.9451, 6.8615, 6.7788, 6.9094, 7.0391, 6.9570,\n 6.8757, 6.7952, 6.9237, 7.0513, 6.9714, 7.0980, 7.0187, 7.1443,\n 7.2691, 7.1904, 7.1125, 7.2363, 7.1590, 7.2818, 7.4039, 7.3271,\n 7.2510, 7.1755, 7.2966, 7.2217, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.2399, 7.1673, 7.2857, 7.4034, 7.3312, 7.4482, 7.3765, 7.4927,\n 7.4215, 7.5369, 7.6517, 7.7658, 7.6950, 7.8084, 7.7380, 7.6681,\n 7.5988, 7.7114, 7.8233, 7.7544, 7.8657, 7.7971, 7.9078, 7.8397,\n 7.9497, 7.8820, 7.9913, 7.9241, 8.0328, 8.1410, 8.0741, 8.0076,\n 7.9415, 8.0490, 7.9833, 7.9181, 8.0249, 8.1312, 8.0663, 8.0018,\n 7.9377, 8.0433, 8.1485, 8.0847, 8.1892, 8.1258, 8.2298, 8.3333,\n 8.2702, 8.2074, 8.3103, 8.2479, 8.3503, 8.4523, 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: are aliens real or are they fake ?\nQuestion 2: Do aliens exists?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, 0.0634, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.2872, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.2195, -0.2626, -0.1309, 0.0000,\n 0.1302, 0.2596, 0.2158, 0.3443, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "181", + "Fraction of T in Greenlist": "91.0%", + "z-score": "21.5", + "p value": "1.03e-102", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 9.1551, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 10.1585, 9.9969, 10.1368, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 10.7918, 10.6421, 10.7732, 10.9030, 11.0315, 11.1588,\n 11.2848, 11.4097, 11.5333, 11.6559, 11.7773, 11.8977, 12.0170, 12.1353,\n 12.2527, 12.3690, 12.4844, 12.5988, 12.7124, 12.8250, 12.9368, 13.0477,\n 13.1578, 13.2671, 13.3755, 13.4832, 13.5901, 13.6963, 13.8017, 13.9064,\n 14.0104, 14.1137, 14.2163, 14.3183, 14.4196, 14.5202, 14.6202, 14.7196,\n 14.8184, 14.9165, 15.0141, 15.1111, 15.2075, 15.3034, 15.3987, 15.4935,\n 15.5877, 15.6814, 15.7746, 15.8673, 15.9594, 16.0511, 16.1423, 16.2330,\n 16.1133, 16.2040, 16.2941, 16.3838, 16.4731, 16.5619, 16.6503, 16.7382,\n 16.8257, 16.9127, 16.7976, 16.8846, 16.9712, 17.0574, 17.1432, 17.2286,\n 17.3136, 17.3981, 17.4824, 17.5662, 17.6497, 17.7328, 17.8155, 17.8979,\n 17.9799, 18.0615, 18.1429, 18.2238, 18.1153, 18.1962, 18.2768, 18.3571,\n 18.4370, 18.5166, 18.5959, 18.6749, 18.7536, 18.6482, 18.7268, 18.8051,\n 18.8832, 18.9609, 19.0383, 19.1154, 19.1922, 19.2688, 19.3450, 19.4210,\n 19.4967, 19.5721, 19.6472, 19.7221, 19.7967, 19.8710, 19.9451, 20.0189,\n 20.0925, 20.1658, 20.2388, 20.3116, 20.3842, 20.4565, 20.5286, 20.4302,\n 20.5022, 20.5740, 20.6456, 20.7169, 20.7880, 20.8589, 20.9296, 21.0000,\n 21.0702, 21.1402, 21.2099, 21.2795, 21.3488, 21.4179, 21.4868])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Where can I buy special flavor cupcake at Gold Coast?\nQuestion 2: Where can I found different flavours for cupcakes at Gold Coast?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.7593, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -0.9415, -0.9816, -0.8513,\n -0.8914, -0.7620, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.7461, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 3.7524, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.4501, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.3320, 9.2463, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.7367, 10.8423, 10.7594, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.2789, 11.1968, 11.1154, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.2816, 11.2028, 11.1245, 11.0468,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.7017, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Does marijuana cause cancer?\nQuestion 2: How can smoking marijuana give you lung cancer?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "84", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "17.9%", + "z-score": "-1.51", + "p value": "0.935", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.0381, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.5205, -2.5717, -2.3462, -2.3982, -2.4495,\n -2.2299, -2.0135, -1.8000, -1.8543, -1.9081, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.5119])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.0536, 8.9355, 8.8192, 8.7045, 8.8405, 8.7277, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.0488, 11.1614, 11.0615, 10.9626,\n 10.8647, 10.7678, 10.8801, 10.7843, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.6287, 12.5394, 12.6387, 12.7376,\n 12.8359, 12.7476, 12.8456, 12.7581, 12.6713, 12.5852, 12.4998, 12.5976,\n 12.5129, 12.4289, 12.5264, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.6896, 13.7803, 13.8707, 13.9606,\n 14.0502, 14.1393, 14.0593, 14.1482, 14.2367, 14.3248, 14.2455, 14.3333,\n 14.2546, 14.1764, 14.0986, 14.0214, 14.1091, 14.0324, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I apply for jobs as an international student in the united states?\nQuestion 2: Proteus software Infrared Light Sensors Microcontrollers +1 How can I simulate an IR sensor using Proteus software?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.5073, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.7679, 1.7158, 1.8490, 1.7970, 1.7454, 1.8773, 1.8257,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.5848, 1.5363, 1.4881, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.7693, 1.8935, 1.8453, 1.7974, 1.7498, 1.8728,\n 1.8252, 1.9473, 2.0688, 2.1896, 2.1418, 2.0943, 2.0470, 2.1667,\n 2.1195, 2.0726, 2.0259, 1.9795, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.0792, 2.9424, 2.8098, 3.0509, 3.2863,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.6790, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.6571, 4.5461, 4.4371, 4.6188,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.2981, 5.4610, 5.6220, 5.5213, 5.6804,\n 5.5811, 5.7382, 5.8936, 6.0474, 5.9491, 6.1012, 6.2517, 6.1546,\n 6.3035, 6.4510, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.0211, 7.1591, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.2001, 7.3333, 7.2443, 7.3765, 7.2884, 7.2012,\n 7.3322, 7.2459, 7.3758, 7.5048, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.8406, 7.9649, 8.0882, 8.0042, 8.1266, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.7482, 8.8636, 8.7831, 8.8978, 9.0117, 8.9319, 9.0452,\n 8.9660, 9.0786, 9.0000, 9.1119, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.5381, 9.6456, 9.5695, 9.6764,\n 9.6008, 9.7072, 9.6322, 9.5577, 9.6635, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.1690, 10.0965,\n 10.0245, 9.9531, 10.0547, 9.9837, 10.0848, 10.1855, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.5128, 10.4427, 10.5410, 10.4713, 10.5692, 10.5000,\n 10.5974, 10.6944, 10.6256, 10.7222, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the procedures for becoming an actuary?\nQuestion 2: What is the procedure of becoming an actuary?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.0050, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.2049, -1.2445, -1.2839, -1.1523, -1.1918,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.7539, -0.6266, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.5443, 9.6786, 9.8116,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.4345, 11.5492,\n 11.4378, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.6772, 11.7881, 11.8982, 11.7932, 11.6894, 11.5866, 11.6966, 11.8058,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 12.8766, 12.9771, 12.8819, 12.7875, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.2025, 13.1122,\n 13.0226, 13.1198, 13.0311, 13.1279, 13.2243, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.5250, 13.6188, 13.5329, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.7350, 13.8270, 13.7434, 13.8350, 13.9262,\n 13.8434, 13.7612, 13.6796, 13.7706, 13.6896, 13.7803, 13.8707, 13.7904,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.1573, 14.2455, 14.1667,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.4382, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are some tips on making it through the job interview process at Access National?\nQuestion 2: What are some tips on making it through the job interview process at eBay?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "13", + "Fraction of T in Greenlist": "6.5%", + "z-score": "-6.02", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.3333,\n -2.3919, -2.4495, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.6833, -2.7351, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -3.1333, -3.1789, -3.2240, -3.2686, -3.3128, -3.3566,\n -3.4000, -3.4429, -3.4855, -3.5277, -3.5695, -3.6109, -3.6520, -3.6927,\n -3.7331, -3.7732, -3.8129, -3.8523, -3.8914, -3.9302, -3.9687, -4.0069,\n -4.0449, -4.0825, -4.1198, -4.1569, -4.1937, -4.2303, -4.2666, -4.3027,\n -4.3385, -4.3740, -4.4093, -4.2222, -4.2581, -4.2938, -4.1100, -4.1461,\n -4.1821, -4.2178, -4.2532, -4.2885, -4.3235, -4.3583, -4.3928, -4.4272,\n -4.4613, -4.4953, -4.5290, -4.5626, -4.5959, -4.6291, -4.6621, -4.6949,\n -4.7275, -4.7599, -4.7921, -4.8242, -4.8561, -4.8878, -4.9193, -4.9507,\n -4.9820, -5.0130, -5.0439, -5.0747, -4.9108, -4.9419, -4.9729, -4.8113,\n -4.8426, -4.8737, -4.9048, -4.9356, -4.9663, -4.9969, -5.0273, -5.0576,\n -5.0877, -5.1177, -5.1475, -5.1772, -5.2068, -5.2362, -5.2655, -5.2947,\n -5.3237, -5.3526, -5.3814, -5.4100, -5.4385, -5.4670, -5.4952, -5.5234,\n -5.5514, -5.5794, -5.6072, -5.6349, -5.6625, -5.5149, -5.5427, -5.5705,\n -5.4245, -5.4526, -5.4805, -5.5082, -5.5359, -5.5635, -5.5909, -5.6183,\n -5.6455, -5.6727, -5.6997, -5.7266, -5.7535, -5.7802, -5.8068, -5.8333,\n -5.8598, -5.8861, -5.9123, -5.9385, -5.9645, -5.9905, -6.0163])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.5433, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 9.1455, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 9.9863, 9.8716, 9.9980, 9.8852, 9.7738, 9.8995,\n 9.7897, 9.6813, 9.8064, 9.6995, 9.5939, 9.4896, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.5294, 9.4346, 9.5543, 9.4606, 9.3678, 9.4868,\n 9.3951, 9.3042, 9.2143, 9.1252, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.4185, 9.3320, 9.2463, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.4513, 9.3686, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.5714, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.1245, 11.2250,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.6949, 11.7901, 11.7169, 11.6441, 11.5718, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I find out what operating system I have on my Macbook?\nQuestion 2: How do I find out what operating system I have?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.1602,\n -2.2177, -2.2743, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.2299, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.1111, -1.1613, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.5387, -0.3904, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.4095, -0.2722, -0.3166, -0.3607, -0.4045, -0.2689, -0.3127, -0.1782,\n -0.0444, -0.0886, -0.1325, 0.0000, -0.0439, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.2100, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.0000, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "177", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "72.9%", + "z-score": "14.7", + "p value": "2.73e-49", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 7.6862, 7.8512, 8.0139, 7.8628, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.5105, 8.3716,\n 8.5218, 8.6702, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.6719, 9.8058, 9.9384, 9.8116,\n 9.9433, 9.8187, 9.9495, 10.0791, 10.2075, 10.0857, 9.9656, 9.8473,\n 9.9754, 10.1024, 9.9863, 10.1124, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 10.9794, 11.0963,\n 11.2124, 11.1033, 11.2187, 11.3333, 11.4471, 11.3399, 11.4531, 11.3473,\n 11.4599, 11.5718, 11.4675, 11.5788, 11.6894, 11.7992, 11.9083, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.2360, 12.3419, 12.4471, 12.5517, 12.4516,\n 12.5557, 12.6592, 12.5604, 12.6635, 12.7660, 12.6684, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.5853, 12.6867, 12.7875, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.2834, 13.3810, 13.2882, 13.3854, 13.4822, 13.3905, 13.4868,\n 13.5827, 13.4920, 13.5876, 13.6826, 13.7772, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.7926, 13.8857, 13.9784, 14.0707, 14.1625, 14.2539,\n 14.3449, 14.4355, 14.5257, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.7113])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do smart and successful people control their emotions?\nQuestion 2: How can I control my emotions?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -1.9052, -1.7636, -1.8033, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.6632, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "191", + "Fraction of T in Greenlist": "96.0%", + "z-score": "23.1", + "p value": "1.33e-118", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.4850, 9.6347, 9.7823, 9.9279, 10.0718, 10.2138, 10.3540, 10.4926,\n 10.6296, 10.7650, 10.8989, 11.0313, 11.1622, 11.2918, 11.4201, 11.5470,\n 11.6727, 11.7971, 11.9203, 12.0424, 12.1633, 12.2832, 12.4019, 12.5196,\n 12.6363, 12.7520, 12.8667, 12.9804, 13.0932, 13.2052, 13.3162, 13.4263,\n 13.5357, 13.6441, 13.7518, 13.8587, 13.9648, 14.0701, 14.1747, 14.2786,\n 14.3818, 14.4842, 14.5860, 14.6871, 14.7875, 14.8873, 14.9864, 15.0849,\n 15.1828, 15.2801, 15.3769, 15.4730, 15.5685, 15.6635, 15.7580, 15.8519,\n 15.9452, 16.0381, 16.1304, 16.2222, 16.3135, 16.4044, 16.4947, 16.5846,\n 16.6740, 16.7629, 16.8514, 16.9394, 17.0270, 17.1141, 17.2008, 17.2871,\n 17.3730, 17.4585, 17.5435, 17.6282, 17.7124, 17.7963, 17.8798, 17.9629,\n 18.0457, 18.1280, 18.2100, 18.2917, 18.3730, 18.4539, 18.5345, 18.6148,\n 18.6947, 18.7743, 18.8535, 18.9325, 19.0111, 19.0894, 19.1673, 19.2450,\n 19.3224, 19.3994, 19.4762, 19.5527, 19.6288, 19.7047, 19.7803, 19.8556,\n 19.9307, 20.0054, 20.0799, 20.1541, 20.2281, 20.3017, 20.3752, 20.4483,\n 20.5212, 20.5939, 20.6663, 20.7384, 20.8103, 20.8820, 20.9534, 21.0246,\n 21.0955, 21.1662, 21.2367, 21.3069, 21.3769, 21.4467, 21.5163, 21.5856,\n 21.6548, 21.7237, 21.7924, 21.8608, 21.9291, 21.9972, 22.0650, 22.1327,\n 22.2001, 22.2674, 22.3344, 22.4012, 22.4679, 22.5343, 22.6006, 22.6667,\n 22.7325, 22.7982, 22.8637, 22.9291, 22.9942, 23.0591, 23.1239])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can eating only fruit for dinner lead to weight loss?\nQuestion 2: Does eating dinner earlier in the evening help with weight loss?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.3780, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.1590, 7.0387, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.6359, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 8.8529, 8.9763, 9.0987, 9.0057, 9.1273, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.8838, 10.9898, 10.9048, 11.0102, 11.1151, 11.2194, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.5841, 11.5022,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.9558, 12.0532, 11.9741, 12.0712, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.2214, 12.1447, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.3615, 13.4510, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I buy used car in India?\nQuestion 2: Which used car should I buy in India?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 1.0079, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.4076, 1.3389, 1.5131, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.2337, 1.3954, 1.3333, 1.2719, 1.4313, 1.3700, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.4857, 1.4289,\n 1.3725, 1.5191, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.5291, 1.6710, 1.6160, 1.7566, 1.7018, 1.8411, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.8983, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.6337, 1.7679, 1.9013, 1.8490, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.6732, 1.8033, 1.7529, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.6925, 1.8175, 1.9419, 2.0656, 2.0170, 1.9686, 2.0913, 2.0430,\n 2.1648, 2.2860, 2.2377, 2.1896, 2.1418, 2.0943, 2.2141, 2.1667,\n 2.1195, 2.0726, 2.0259, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "85", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "61.2%", + "z-score": "7.7", + "p value": "6.67e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Can i use mods in ps2 games like Gta San Andreass and others ?\nQuestion 2: Nintendo 3DS: Can I just copy the downloaded games to another, bigger SD card for use in the same console?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -1.8257,\n -1.8935, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -0.9733, -1.0229, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.1357, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.1761, 0.1317, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.3333, 9.2351, 9.3582, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.2763, 10.3898, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 10.9497, 11.0562, 11.1621,\n 11.0756, 10.9898, 10.9048, 11.0102, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.3616, 11.2789, 11.1968, 11.2992, 11.2178, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.6412, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 12.0712, 12.1677, 12.0893, 12.1854,\n 12.1076, 12.2033, 12.2987, 12.3935, 12.3163, 12.4109, 12.5049, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the difference between the House of Representatives and the Senate?\nQuestion 2: If both senators represent the whole state, then which senator do I get to vote for? What determines this?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.9%", + "z-score": "-0.0411", + "p value": "0.516", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.2100, -0.2513, -0.1253, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 5.9604, 6.1968, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.7337, 6.9282, 6.7390, 6.9307, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.1317, 6.9631, 7.1435, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 7.7723, 7.9286, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.0385, 10.1614, 10.2833, 10.1756, 10.0692, 10.1905,\n 10.0855, 10.2061, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 10.8215, 10.9355, 11.0488, 11.1614, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 11.9754, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.5188, 12.4286, 12.5289, 12.6287, 12.7279, 12.8267, 12.7376,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.0400, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.4390, 13.5329, 13.6264, 13.5412,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.7434, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.5726, 14.4923, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the best tips for outlining/planning a novel?\nQuestion 2: How do I best outline my novel?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.6971, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.5927, -0.6333, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.1890, 9.3231, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 11.9288, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.1568, 12.2615, 12.3655, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.9820, 12.8877, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.0984, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 14.0660, 13.9784, 14.0707, 14.1625, 14.2539,\n 14.3449, 14.2584, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.5378, 14.6267, 14.7152, 14.8034, 14.8912, 14.8074, 14.8950, 14.9821,\n 15.0689, 15.1553, 15.0726, 15.1587, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.4182, 15.3370, 15.4217, 15.5060, 15.5900, 15.6736, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What's the easiest way to make money online from India?\nQuestion 2: What is the best way to make money in india?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "177", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.13", + "p value": "0.983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.1866, -1.9604, -2.0156, -1.7942, -1.8500, -1.6330,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.4290, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.6521, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 5.9628, 5.8398, 6.0125, 5.8919, 5.7735,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.0483, 7.9495, 7.8520, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.7003, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.5030, 8.6238, 8.5381, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.7267, 8.6433, 8.7610, 8.6783, 8.7952, 8.9113,\n 9.0267, 8.9448, 9.0595, 9.1735, 9.0923, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.2232, 9.3338, 9.4438, 9.3659,\n 9.2885, 9.3979, 9.3212, 9.2450, 9.1694, 9.2782, 9.3863, 9.3113,\n 9.2368, 9.3443, 9.2704, 9.1970, 9.1242, 9.2311, 9.1587, 9.0869,\n 9.0155, 9.1218, 9.2276, 9.3328, 9.2619, 9.1915, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.5369, 9.6394, 9.7415, 9.8431, 9.9442, 9.8746,\n 9.9752, 9.9060, 10.0061, 9.9374, 10.0371, 9.9687, 9.9008, 9.8333,\n 9.9325, 10.0312, 10.1295, 10.0624, 9.9957, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What will happen if Donald Trump became the president of America?\nQuestion 2: What will happen now that President-elect Donald Trump has won the election?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.2478,\n -2.2871, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.2406, -2.2785, -2.1381,\n -2.1762, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.3835,\n -2.4195, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.3447, -2.3798, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.3255, 6.5354, 6.7402, 6.9402,\n 6.7338, 6.9310, 6.7337, 6.5433, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 7.1317, 6.9631, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 9.1333, 9.0068, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.3571, 10.4745,\n 10.5909, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 10.8960, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.0554, 11.1640, 11.0724, 11.1803, 11.2877,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.2316, 11.3373, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.4935, 11.4080, 11.5111, 11.4263,\n 11.3423, 11.4450, 11.3616, 11.4638, 11.5655, 11.6666, 11.5841, 11.6847,\n 11.6029, 11.7031, 11.8028, 11.9020, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.2467, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.3163, 12.4109, 12.3342, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.7928, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What's the best way to increase agricultural productivity?\nQuestion 2: I own an innovative advertising & marketing company operating for over 1 year & I am looking for an investor or financier partner to help expand fast?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -2.1241, -1.9052,\n -1.9596, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.5492,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.1309, -2.1768, -2.0000, -2.0461, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.4623, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.5247, -2.5620, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.6472, -2.6830, -2.7187, -2.5820, -2.6178, -2.4822, -2.5181, -2.5538,\n -2.4195, -2.4553, -2.4910, -2.3580, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 4.2515, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.0928, 8.2369, 8.3795, 8.2624,\n 8.1471, 8.0335, 8.1750, 8.0632, 7.9530, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.4592, 11.3721, 11.4762, 11.3899, 11.3043, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.3812, 11.2992, 11.2178, 11.3196,\n 11.2389, 11.1588, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.0893, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.9540, 13.0460, 12.9691, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the value of [math]\\displaystyle\\int_{0}^{\\infty} \\dfrac{x}{x^2+a^2}\\,dx [/math]?\nQuestion 2: What is the value of [math]\\displaystyle\\int_{-1}^1 \\frac{\\sqrt{1-x^2}}{a-x} \\, dx[/math] where [math]a > 1[/math]?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "24.8%", + "z-score": "-0.0516", + "p value": "0.521", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, 0.1275, 0.3169, 0.2520, 0.1879, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "55.1%", + "z-score": "9.77", + "p value": "7.94e-23", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547, 1.5403, 1.9052,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 2.3570, 2.1939, 2.4910, 2.7778,\n 2.6186, 2.4659, 2.7406, 2.5924, 2.8577, 2.7136, 2.9704, 3.2205, 3.4641,\n 3.3221, 3.1844, 3.4207, 3.2863, 3.5165, 3.3853, 3.6098, 3.8297, 4.0451,\n 3.9158, 3.7897, 3.6667, 3.8765, 3.7559, 3.9614, 3.8431, 4.0446, 4.2426,\n 4.4374, 4.3205, 4.2060, 4.3970, 4.2844, 4.4721, 4.3614, 4.5461, 4.7281,\n 4.9075, 5.0844, 4.9747, 4.8669, 5.0410, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.4610, 5.3605, 5.5213, 5.4222, 5.5811,\n 5.7382, 5.8936, 6.0474, 5.9491, 5.8522, 5.7566, 5.9084, 6.0587, 6.2075,\n 6.1128, 6.2601, 6.1664, 6.0740, 6.2197, 6.3640, 6.2725, 6.4153, 6.3248,\n 6.4663, 6.6066, 6.7456, 6.6559, 6.5672, 6.7049, 6.6171, 6.7536, 6.6667,\n 6.8019, 6.9361, 7.0692, 7.2012, 7.1149, 7.0296, 6.9451, 7.0759, 7.2058,\n 7.3346, 7.2508, 7.3786, 7.2956, 7.2134, 7.3402, 7.4661, 7.3845, 7.5094,\n 7.4286, 7.5526, 7.6758, 7.7981, 7.7178, 7.6383, 7.7597, 7.6808, 7.8014,\n 7.7232, 7.8429, 7.9619, 8.0801, 8.1976, 8.1198, 8.0427, 7.9663, 8.0829,\n 8.1988, 8.3140, 8.2381, 8.3526, 8.2772, 8.2024, 8.3162, 8.4293, 8.3550,\n 8.4674, 8.3937, 8.5054, 8.6165, 8.7270, 8.6537, 8.5810, 8.6908, 8.6186,\n 8.7278, 8.6560, 8.7646, 8.8726, 8.9800, 9.0869, 9.0155, 8.9447, 8.8744,\n 8.9806, 9.0863, 9.1915, 9.1215, 9.2261, 9.1566, 9.0876, 9.1916, 9.2952,\n 9.2265, 9.3295, 9.2613, 9.3638, 9.4658, 9.5673, 9.4995, 9.4321, 9.5331,\n 9.4661, 9.5666, 9.5000, 9.6000, 9.6996, 9.7987, 9.8974, 9.8311, 9.7653])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is it like to study in Llandudno?\nQuestion 2: Ford Figo 2016 or Grand i10 petrol 1.2 only?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.7823, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.8980, 0.8337, 1.0070, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.2337, 1.1721, 1.1111, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.8601, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.9313, 0.8866, 0.8422, 0.7979, 0.7539, 0.8773, 0.8333,\n 0.7896, 0.9119, 1.0336, 0.9897, 0.9461, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "161", + "Fraction of T in Greenlist": "80.9%", + "z-score": "18.2", + "p value": "2.05e-74", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 9.1002, 8.9456, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.4614, 10.5903, 10.7179, 10.5862,\n 10.7131, 10.5838, 10.7099, 10.8350, 10.9589, 11.0818, 10.9560, 11.0782,\n 10.9546, 11.0762, 11.1967, 11.3163, 11.4349, 11.5525, 11.6693, 11.7851,\n 11.6656, 11.7809, 11.8953, 12.0089, 12.1216, 12.2336, 12.3447, 12.4550,\n 12.5646, 12.6735, 12.7815, 12.8889, 12.9955, 13.1015, 13.2067, 13.3113,\n 13.1979, 13.3022, 13.4057, 13.2942, 13.3974, 13.4999, 13.6019, 13.7032,\n 13.8039, 13.9040, 14.0036, 14.1025, 14.2009, 14.2988, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.6736, 14.7685, 14.8629, 14.7580, 14.6542,\n 14.7486, 14.8425, 14.9359, 15.0289, 15.1213, 15.2134, 15.3049, 15.3960,\n 15.4867, 15.3858, 15.4762, 15.5662, 15.6558, 15.7449, 15.8336, 15.9220,\n 15.8232, 15.7252, 15.8135, 15.9014, 15.8046, 15.8923, 15.7965, 15.8840,\n 15.9711, 16.0578, 16.1441, 16.0497, 16.1358, 16.0424, 16.1283, 16.2139,\n 16.2990, 16.3839, 16.4684, 16.5525, 16.6363, 16.5446, 16.6282, 16.7115,\n 16.7944, 16.8770, 16.9592, 17.0411, 17.1227, 17.2040, 17.2850, 17.3656,\n 17.4460, 17.5260, 17.6058, 17.6852, 17.7643, 17.8432, 17.7546, 17.8333,\n 17.9117, 17.8241, 17.9023, 17.9803, 18.0580, 18.1355, 18.2126])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why don't people just shoot Captain America below the shield?\nQuestion 2: Why did everyone have issue accepting Sam Wilson as the new Captain America, no one had an issue when Bucky took up the shield?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.9949, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.5023, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.4714, 0.6108, 0.5620,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.6058, 0.7336, 0.6885, 0.6437, 0.7703, 0.7255, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.9119, 0.8682, 0.9897, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 4.2515, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 7.8928,\n 8.0413, 7.9196, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.1024, 10.2222, 10.3411, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.1754, 10.2923, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.3642, 11.4714, 11.3791, 11.2877,\n 11.3944, 11.3039, 11.2142, 11.1253, 11.2316, 11.1435, 11.0562, 11.1621,\n 11.0756, 10.9898, 10.9048, 10.8204, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.1968, 11.1154, 11.2178, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.6606, 11.5813,\n 11.5026, 11.6016, 11.5234, 11.6220, 11.7200, 11.8176, 11.7401, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.5221, 12.4460, 12.3705, 12.4638, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.5923, 12.6841, 12.6102, 12.7017, 12.7928, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is PlatePlay available in Barcelona?\nQuestion 2: Wher is Barcelona?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.0973, -1.1429, -1.1882,\n -1.0359, -0.8847, -0.7346, -0.5855, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.6623, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.0390, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.3659, 7.5340, 7.6996, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.2536,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.8058, 9.9384, 10.0698,\n 10.1999, 10.3287, 10.4565, 10.5830, 10.7084, 10.5837, 10.7084, 10.8321,\n 10.9546, 10.8327, 10.9546, 10.8347, 10.9559, 11.0761, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.5470, 11.6620, 11.7762, 11.8896, 12.0021,\n 12.1139, 12.0005, 12.1118, 12.0000, 11.8895, 11.7803, 11.8915, 11.7838,\n 11.8944, 12.0044, 11.8982, 12.0077, 12.1164, 12.0118, 12.1200, 12.2275,\n 12.1244, 12.0223, 12.1295, 12.0286, 11.9288, 11.8299, 11.9370, 12.0433,\n 11.9457, 12.0516, 12.1568, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.5853, 12.6867, 12.7875, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.2834, 13.3810, 13.4780, 13.5746, 13.6707, 13.7663, 13.6742,\n 13.7694, 13.8642, 13.7730, 13.8675, 13.7772, 13.8713, 13.9650, 13.8756,\n 13.7870, 13.6990, 13.7926, 13.8857, 13.9784, 13.8914, 13.8051, 13.7194,\n 13.6343, 13.7270, 13.6427, 13.7350, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.8434, 13.9343, 13.8522, 13.7706, 13.8613, 13.9515, 13.8707, 13.7904,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.3248, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.4294, 14.5162, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do I increase sperms in my body?\nQuestion 2: Does the quantity of blood in our body increase as we grow?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.0735, -1.1547, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.3290, -0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.4619, 0.4021, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.6222, 0.5680, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.7293, 0.8721, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.8987, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 1.1094, 1.0598, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.0215, 1.1513, 1.2804, 1.2326, 1.1852, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.3303, 1.2839, 1.2377, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.6%", + "z-score": "10.6", + "p value": "1.73e-26", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 4.8742, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.4065, 6.5607, 6.7132,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.4679, 8.5896, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.6924, 8.6083, 8.7267, 8.6433, 8.7610, 8.6783, 8.7952, 8.9113,\n 8.8294, 8.9448, 9.0595, 8.9783, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.7109, 9.8197, 9.7405,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.9067, 9.8293, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.8131, 9.7380, 9.8433, 9.9481, 10.0523, 9.9778,\n 9.9038, 9.8303, 9.7574, 9.8611, 9.7886, 9.8918, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.0547, 10.1558, 10.2565, 10.3566, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.7090, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.7222, 10.6538, 10.5859])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: When you get a love spell done do you feel drained and tired?\nQuestion 2: What do Quorans think of that moment when you finally get a notification and when you open it, it's Quora correcting your spelling mistakes?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.3242, -1.4142,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, -0.1741,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.2487, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "108", + "# Tokens in Greenlist": "69", + "Fraction of T in Greenlist": "63.9%", + "z-score": "9.33", + "p value": "5.13e-21", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.3434, 6.5433, 6.3594, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.7045, 8.8405, 8.7277, 8.6164,\n 8.7515, 8.8853, 8.7758, 8.6678, 8.8007, 8.6942, 8.8260, 8.9567,\n 9.0863, 8.9815, 9.1101, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.3088, 9.2094, 9.3333])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What do other countries think of Bosnia and Herzegovina?\nQuestion 2: Why are Bosnia and Herzegovina one country?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, -0.0461, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, 0.0422, 0.0000, -0.0420, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 3.5839, 3.4219, 3.2660,\n 3.1156, 2.9704, 2.8301, 3.0792, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.9528, 4.8394, 5.0190, 4.9075,\n 4.7980, 4.6904, 4.5847, 4.4809, 4.3788, 4.5544, 4.4537, 4.6268,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.9346, 5.0990, 5.2615, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.5435, 5.6986, 5.8522, 6.0041, 5.9084,\n 5.8139, 5.9641, 6.1128, 6.2601, 6.4059, 6.5504, 6.4566, 6.3640,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.6066, 6.5169, 6.4283, 6.3408,\n 6.2541, 6.1685, 6.0838, 6.2222, 6.1383, 6.0553, 5.9732, 6.1101,\n 6.2459, 6.3807, 6.5144, 6.6471, 6.7788, 6.6968, 6.8274, 6.9570,\n 6.8757, 6.7952, 6.7155, 6.6365, 6.7648, 6.8922, 7.0187, 7.1443,\n 7.2691, 7.1904, 7.1125, 7.0353, 7.1590, 7.2818, 7.2051, 7.3271,\n 7.2510, 7.1755, 7.1007, 7.0265, 6.9529, 6.8799, 7.0007, 6.9282,\n 6.8563, 6.7850, 6.9048, 7.0238, 7.1421, 7.2596, 7.3765, 7.4927,\n 7.4215, 7.5369, 7.6517, 7.5809, 7.6950, 7.6246, 7.5548, 7.6681,\n 7.7808, 7.8928, 8.0042, 8.1150, 8.0455, 7.9764, 7.9078, 8.0178,\n 8.1273, 8.0591, 8.1679, 8.1001, 8.0328, 7.9659, 7.8995, 7.8335,\n 7.7679, 7.8759, 7.8107, 7.9181, 7.8533, 7.9601, 8.0663, 8.1721,\n 8.2773, 8.3820, 8.4862, 8.4215, 8.5252, 8.6284, 8.5640, 8.6667,\n 8.7689, 8.8706, 8.8065, 8.9077, 8.8439, 8.9446, 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is one of the biggest misconceptions people have about you?\nQuestion 2: What do you perceive to be the biggest misconception people have about you? Why do you think this misconception exists?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.6149, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.5864, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 8.7986, 8.9355, 9.0711, 8.9550, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.0179, 8.9086, 8.8007, 8.9324, 9.0629, 8.9567,\n 8.8518, 8.9815, 8.8780, 8.7757, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.5795, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.4501, 9.3611, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.6758, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 9.9249, 10.0342, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.7175, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.4581, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.2607, 11.3572, 11.4533, 11.5489, 11.6441, 11.5718, 11.5000,\n 11.5948, 11.5235, 11.6179, 11.5470, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Will overstaying my visit visa with 3 months affect my spouse visa application?\nQuestion 2: Will overstaying my UK visit visa with 3 months affect my UK spouse visa application?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "30.2%", + "z-score": "0.946", + "p value": "0.172", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.9456])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.3391, 8.1929, 8.0498, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 8.9815,\n 9.1225, 8.9935, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.4560, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 10.1124, 9.9980, 9.8852, 10.0107, 9.8995,\n 10.0242, 9.9146, 9.8064, 9.9304, 10.0535, 9.9469, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.5048, 11.4065, 11.5157, 11.4184, 11.3222, 11.4310,\n 11.3357, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.0226, 12.9337, 13.0311, 12.9430, 12.8556, 12.9527, 12.8661, 12.7802,\n 12.8769, 12.9732, 12.8881, 12.8037, 12.8997, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.4758, 13.5683, 13.4859, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.4263, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.8113, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.2546, 14.3422, 14.2640, 14.1863, 14.2737, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How can you find all of your Gmail accounts?\nQuestion 2: How do I resolve Gmail account issues?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -1.9437, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.1082,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.2608, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.4037, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.5386, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.7591, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.6667,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "66.5%", + "z-score": "13.3", + "p value": "6.14e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.1754, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 10.8960, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.0389, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.4935, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 12.1184, 12.2178, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.2627, 12.1805, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.6930, 12.6130, 12.7077,\n 12.8019, 12.7226, 12.8165, 12.9099, 12.8313, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.3473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: \"What are the best slogans based on the theme \"\"United India\"\" or \"\"\u0930\u093e\u0937\u094d\u091f\u094d\u0930\u0940\u092f \u090f\u0915\u0924\u093e\"\"?\"\nQuestion 2: What are some of the best slogans written on trucks in India?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.4762, -1.5187, -1.5608, -1.6028, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -1.8821, -1.9211, -1.9599,\n -1.9985, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 5.0483, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.1857, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 10.7955, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.7211, 10.8353, 10.7362, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 11.1026, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.5645, 11.6709, 11.5779, 11.4857,\n 11.5917, 11.5005, 11.4101, 11.5156, 11.4261, 11.3373, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.4746, 12.5732, 12.6713, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.2593, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.7434, 13.8350, 13.9262,\n 13.8434, 13.9343, 13.8522, 13.7706, 13.6896, 13.6091, 13.5292, 13.6201,\n 13.5408, 13.4620, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 13.8447, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the best order to read the Ender saga in?\nQuestion 2: What year is Ender's Game (2013 movie) set in?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.3651, 0.3026, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n 0.1629, 0.3244, 0.2692, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.6030, 0.5507, 0.4988, 0.4472, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.7293, 0.8721, 0.8208, 0.9623,\n 0.9110, 0.8601, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.9497, 0.9017, 0.8540, 0.9858, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.1038, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 1.0788, 1.0328, 0.9870, 1.1127, 1.0670, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.1790, 1.1339, 1.0890, 1.2115, 1.3333,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.9291, 10.8363, 10.9462, 11.0554, 10.9637, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.5615, 12.6601, 12.5732, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.3829, 13.4758, 13.5683, 13.4859, 13.5781,\n 13.6698, 13.5881, 13.6796, 13.5985, 13.6896, 13.7803, 13.8707, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.3166, 14.2367, 14.3248, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.5948, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the average BPM for these genres of music?\nQuestion 2: Can we consider musicals a popular music genre?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.0467, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.1650, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.6719, 9.8058, 9.9384, 9.8116,\n 9.6867, 9.8187, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.9585, 10.8423,\n 10.9621, 11.0810, 10.9669, 11.0851, 10.9727, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.2187, 11.1111, 11.2259, 11.1197, 11.0147, 11.1291,\n 11.0254, 10.9229, 11.0368, 11.1500, 11.2623, 11.1614, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.2966, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.2882, 13.1962, 13.2936, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.0619, 14.1543, 14.2464, 14.3380, 14.2499, 14.3412, 14.2539,\n 14.3449, 14.4355, 14.5257, 14.6155, 14.7049, 14.7939, 14.8825, 14.9707,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.1498, 15.2364, 15.3226,\n 15.2387, 15.1553, 15.2414, 15.3272, 15.2446, 15.1625, 15.0810, 15.1667,\n 15.0858, 15.0054, 15.0909, 15.1761, 15.2609, 15.1813, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which laptop would be better to buy now an i5 6200U with AMD R5 M330 or an i5 5200U with AMD R5 M230?\nQuestion 2: Which laptop would be better the one with i5 5200U and AMD R5 M330 2GB or i5 5200U with Nvidia 820M 2GB?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "177", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "20.9%", + "z-score": "-1.26", + "p value": "0.896", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -0.9631, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.2326, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.0928, 5.9874, 5.8835, 5.7812, 5.9386,\n 5.8377, 5.9932, 5.8936, 6.0474, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.2532, 7.3901, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.3453, 8.2588, 8.1731, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.7267, 8.8443, 8.9612, 8.8778, 8.7952, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.2055, 9.1250, 9.0452,\n 9.1577, 9.0786, 9.0000, 9.1119, 9.2232, 9.3338, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.6921, 9.7997, 9.9067, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.0987, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.2872, 10.2132, 10.1398, 10.0668, 10.1690, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.4281, 10.5278, 10.4563, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.7090, 10.8064, 10.9034, 11.0000,\n 10.9299, 10.8602, 10.9564, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Are Rolls-Royce cars given only to reputed personalities?\nQuestion 2: What is the criteria to buy a Rolls-Royce?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.5298, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.2907, -0.1448, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.2289, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.3146, 0.2689, 0.4021, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.6437, 0.5991, 0.7255, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.7979, 0.9215, 0.8773, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.9461, 0.9027, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.8919, 5.7735,\n 5.6573, 5.5432, 5.4312, 5.3211, 5.4909, 5.6585, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.9333, 6.0928, 6.2505, 6.1450, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.4993, 6.4008,\n 6.3035, 6.4510, 6.3549, 6.2601, 6.4059, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.7890, 6.9282, 7.0662, 6.9743, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.4233, 7.5556, 7.6867, 7.8168, 7.9460, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.3813, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.9285, 9.0453, 9.1615, 9.2768, 9.1927, 9.1094,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.7908, 9.7109, 9.6317, 9.5532,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.3827, 10.3065, 10.2310, 10.1559,\n 10.2591, 10.1846, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 10.9685, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.3091, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.4525, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Whose Economy is in better shape in 2016: Pakistan or Bangladesh?\nQuestion 2: Does Pakistan hate Bangladesh?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "10.1%", + "z-score": "-4.87", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.4499, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.7626,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.7439, -3.7791, -3.8142, -3.8490,\n -3.8837, -3.9181, -3.9524, -3.9865, -4.0204, -4.0541, -3.8997, -3.9337,\n -3.9675, -4.0011, -4.0345, -4.0678, -4.1009, -4.1338, -4.1666, -4.1992,\n -4.2316, -4.2639, -4.2960, -4.3280, -4.1800, -4.2122, -4.2443, -4.2762,\n -4.3079, -4.3395, -4.3710, -4.4023, -4.4334, -4.4644, -4.4953, -4.5260,\n -4.5566, -4.5871, -4.4448, -4.4754, -4.5060, -4.5364, -4.5666, -4.5968,\n -4.6268, -4.6567, -4.6864, -4.7161, -4.7456, -4.7749, -4.8042, -4.8333,\n -4.6961, -4.7255, -4.7547, -4.7838, -4.8127, -4.8416, -4.8703])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "159", + "Fraction of T in Greenlist": "79.9%", + "z-score": "17.9", + "p value": "7.69e-72", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.6743, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 10.2172, 10.3510, 10.4834, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 11.2376, 11.3608,\n 11.2263, 11.0938, 11.2169, 11.3389, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.9338, 12.0499, 11.9230, 12.0386, 12.1533, 12.2671, 12.3801, 12.4922,\n 12.6035, 12.7140, 12.5916, 12.4708, 12.5812, 12.6909, 12.7998, 12.9080,\n 13.0154, 13.1221, 13.2280, 13.3333, 13.2167, 13.3217, 13.4259, 13.5295,\n 13.6324, 13.7347, 13.8364, 13.9375, 13.8244, 13.7125, 13.8136, 13.9140,\n 14.0139, 14.1131, 14.2118, 14.3099, 14.4075, 14.2988, 14.3961, 14.4928,\n 14.5890, 14.6847, 14.7799, 14.8746, 14.9687, 15.0624, 14.9568, 14.8522,\n 14.9459, 15.0391, 15.1318, 15.2240, 15.3158, 15.4072, 15.4980, 15.3960,\n 15.4867, 15.5769, 15.6667, 15.7560, 15.8450, 15.9335, 16.0216, 16.1093,\n 16.0099, 15.9113, 15.9990, 16.0863, 16.1732, 16.2598, 16.3459, 16.4317,\n 16.5171, 16.4207, 16.3250, 16.4104, 16.4954, 16.5801, 16.6644, 16.7484,\n 16.8320, 16.9152, 16.8216, 16.7286, 16.8118, 16.8948, 16.9774, 17.0596,\n 17.1415, 17.2231, 17.3044, 17.2133, 17.1227, 17.2040, 17.2850, 17.3656,\n 17.4460, 17.5260, 17.6058, 17.6852, 17.5963, 17.5081, 17.5875, 17.6667,\n 17.7455, 17.8241, 17.9023, 17.9803, 18.0580, 17.9714, 17.8852])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Steel: Which is the best industrial wire products manufacturing company in India?\nQuestion 2: What is the steel production capacity of top 10 steel manufacturers individually in India?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "36.5%", + "z-score": "2.11", + "p value": "0.0175", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 1.3480, 1.6330,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 2.3163, 2.5533, 2.7852, 2.6681, 2.5538, 2.7791, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.5621, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 4.7336, 4.9373, 4.8003, 5.0000,\n 4.8662, 4.7357, 4.9316, 5.1241, 4.9962, 4.8712, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 5.8812, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.9333, 6.0928, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 8.0829, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.4501, 9.5668, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 10.0504, 10.1627, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.7349, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 10.9898, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.6219, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.3263, 12.4223, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.0314, 12.9540, 13.0460, 12.9691, 12.8928, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.2864, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why are people evil?\nQuestion 2: Why are some people evil? Are they born that way or does life make them evil?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.7392, -1.7823,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.8033, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.1532, -2.1896, -2.2258, -2.2618, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.6140, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.4061, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 7.9754, 7.8598, 8.0042,\n 7.8905, 8.0335, 7.9216, 8.0632, 8.2035, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.5337, 8.4270, 8.5612, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 8.9314, 9.0582,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.7678, 9.8858, 9.7912, 9.9085,\n 9.8150, 9.9315, 10.0472, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.2592, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.7822, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.3249, 11.4244, 11.5234, 11.6220, 11.7200, 11.8176, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.1260, 12.0493, 11.9730, 12.0685, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.3888, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.5183, 12.4448, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Why didn't Ned Stark bring more men to the Tower of Joy?\nQuestion 2: Why did Ned Stark go to the Tower of Joy with so few men? Why not bring a small guard (say 20 more men) of loyal and discreet northerners?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -2.3850, -2.4495,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.7761, -2.8296, -2.8823, -2.9343, -2.9856, -3.0361, -3.0861,\n -3.1353, -3.1840, -2.9314, -2.9814, -3.0308, -3.0796, -3.1278, -3.1754,\n -3.2225, -3.2691, -3.0330, -3.0806, -3.1277, -3.1743, -3.2204, -3.2660,\n -3.3111, -3.3558, -3.4000, -3.4438, -3.4871, -3.5301, -3.3128, -3.3566,\n -3.4000, -3.1879, -2.9785, -3.0237, -3.0685, -3.1129, -3.1568, -3.2004,\n -3.2435, -3.0429, -2.8446, -2.8893, -2.9336, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -2.7713, -2.8150, -2.8583, -2.6737, -2.4910,\n -2.3101, -2.3552, -2.1768, -2.2222, -2.2673, -2.0918, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -1.8371,\n -1.8808, -1.9242, -1.7655, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.1254, -1.9837, -2.0226, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.0512, -1.9149,\n -1.9528, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -1.9843, -1.8527, -1.8898, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.7410, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 8.7913, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.4501, 9.3611, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.7622, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.0389, 9.9542, 10.0647, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.9091, 10.8282, 10.9317, 10.8515, 10.9545,\n 10.8749, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.4244, 11.5234, 11.6220, 11.5444, 11.4674, 11.3910, 11.3150,\n 11.4132, 11.3378, 11.2630, 11.1886, 11.2864, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.6949, 11.6217, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.0209, 11.9487, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the meaning of nani desu ka?\nQuestion 2: What is the meaning of life?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 1.7823, 2.0494, 1.9245, 2.1831, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "152", + "Fraction of T in Greenlist": "76.4%", + "z-score": "16.7", + "p value": "3.39e-63", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 9.1652, 9.3274, 9.4868, 9.6437, 9.7980,\n 9.9499, 10.0995, 10.2470, 10.3923, 10.5357, 10.6771, 10.8167, 10.9545,\n 11.0905, 11.2250, 11.3578, 11.4891, 11.6189, 11.7473, 11.8743, 12.0000,\n 12.1244, 12.2474, 12.3693, 12.4900, 12.6095, 12.7279, 12.8452, 12.9615,\n 12.7708, 12.8877, 13.0035, 12.8201, 12.9364, 13.0516, 13.1658, 13.2791,\n 13.3913, 13.5027, 13.6132, 13.7227, 13.5534, 13.6633, 13.7723, 13.8804,\n 13.9878, 13.8258, 13.9333, 14.0400, 14.1460, 14.2511, 14.3555, 14.2009,\n 14.3055, 14.4092, 14.5123, 14.6146, 14.7163, 14.8172, 14.9175, 14.7710,\n 14.8714, 14.7277, 14.8281, 14.6871, 14.7875, 14.8873, 14.7495, 14.6135,\n 14.7139, 14.5803, 14.6805, 14.7802, 14.8792, 14.9775, 14.8478, 14.9461,\n 15.0437, 15.1408, 15.2374, 15.3333, 15.4287, 15.5236, 15.6179, 15.4935,\n 15.3705, 15.4651, 15.5592, 15.4384, 15.3189, 15.4133, 15.2955, 15.1789,\n 15.2735, 15.3676, 15.4612, 15.3469, 15.2337, 15.1217, 15.2158, 15.1052,\n 15.1990, 15.0898, 15.1834, 15.0756, 14.9687, 15.0624, 14.9568, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.3158, 15.4072, 15.4980, 15.3960,\n 15.4867, 15.3858, 15.4762, 15.5662, 15.6558, 15.7449, 15.6457, 15.7346,\n 15.6365, 15.7252, 15.6280, 15.5316, 15.6203, 15.5249, 15.6133, 15.7014,\n 15.7890, 15.8763, 15.9632, 15.8694, 15.9561, 15.8631, 15.9496, 16.0357,\n 16.1214, 16.0296, 16.1151, 16.2003, 16.1095, 16.1945, 16.2791, 16.3633,\n 16.2736, 16.3577, 16.4414, 16.3526, 16.2644, 16.3481, 16.4314, 16.5144,\n 16.4272, 16.5100, 16.4236, 16.5062, 16.4205, 16.5028, 16.5849, 16.5000,\n 16.5819, 16.6634, 16.5793, 16.6607, 16.5772, 16.6584, 16.7393])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How can I introduce the surface area of cuboid by using real life situations?\nQuestion 2: What are the applications of surface area in real life?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.1909,\n -2.2542, -1.9599, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.0428, -2.1019, -2.1602,\n -2.2177, -1.9711, -2.0294, -1.7889, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.0667, -2.1193, -1.9081, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.0732, -2.1213,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.0918, -2.1372, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -1.9420, -1.9863, -2.0303, -2.0739, -1.9107, -1.9545, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.0083,\n -2.0476, -1.9052, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.0512, -2.0889,\n -1.9528, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -1.9473, -1.9843, -2.0212, -2.0578, -1.9267, -1.9635, -1.8333,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.4659, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.5744, 2.8301, 3.0792, 2.9424, 2.8098, 3.0509, 3.2863,\n 3.1558, 3.0290, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.3333,\n 3.5466, 3.4293, 3.3147, 3.5228, 3.4101, 3.2998, 3.5032, 3.7033,\n 3.5942, 3.7905, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.5115, 4.6904, 4.5847, 4.7610, 4.6568, 4.5544, 4.4537, 4.3546,\n 4.5274, 4.4296, 4.3333, 4.5034, 4.4083, 4.5760, 4.4820, 4.3894,\n 4.2981, 4.4630, 4.6262, 4.5356, 4.4462, 4.3580, 4.5186, 4.4313,\n 4.3451, 4.5035, 4.6603, 4.5747, 4.7296, 4.8830, 4.7980, 4.7140,\n 4.6311, 4.7823, 4.9322, 4.8497, 4.7682, 4.9163, 4.8355, 4.9820,\n 5.1273, 5.0469, 4.9675, 5.1111, 5.0323, 4.9543, 4.8772, 4.8008,\n 4.9424, 4.8666, 5.0070, 5.1461, 5.0707, 5.2086, 5.3455, 5.4813,\n 5.6160, 5.5407, 5.6743, 5.5995, 5.7320, 5.6578, 5.7892, 5.7155,\n 5.6424, 5.7726, 5.7001, 5.6282, 5.7572, 5.6858, 5.8138, 5.7429,\n 5.6725, 5.7994, 5.9254, 5.8554, 5.7860, 5.7171, 5.8419, 5.7735,\n 5.7056, 5.8294, 5.9524, 5.8848, 6.0069, 6.1283, 6.0609, 5.9941,\n 5.9279, 6.0481, 6.1677, 6.1017, 6.0362, 6.1548, 6.0897, 6.2075,\n 6.3247, 6.2598, 6.1954, 6.3117, 6.2476, 6.1839, 6.2994, 6.2361,\n 6.1732, 6.2879, 6.4019, 6.3392, 6.4526, 6.5653, 6.6775, 6.7890,\n 6.7264, 6.8373, 6.7751, 6.8853, 6.8233, 6.7618, 6.7006, 6.6398,\n 6.7492, 6.6887, 6.6285, 6.7372, 6.6774, 6.7854, 6.7259, 6.8333,\n 6.9403, 6.8809, 6.8219, 6.7632, 6.7049, 6.6469, 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What cheese skins can you eat?\nQuestion 2: Why don't Americans eat the skin of a banana?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.4317, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.5068, 1.7285, 1.6348, 1.5430,\n 1.7589, 1.9711, 1.8791, 2.0870, 1.9959, 1.9064, 1.8185, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.8766, 2.0702, 1.9870, 1.9052,\n 2.0948, 2.0135, 1.9333, 1.8543, 2.0397, 1.9612, 1.8838, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 0.9488, 0.8889, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 5.9214, 5.6622, 5.8989, 5.6569,\n 5.4271, 5.2085, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 9.9570, 9.8367, 9.9656, 9.8473,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.7429, 10.8616, 10.9794, 11.0963,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.6772, 11.5718, 11.6829, 11.5788, 11.4759, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.6041, 11.7130, 11.8212, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.2722, 13.1765, 13.0815, 13.1806, 13.0866,\n 12.9935, 12.9011, 13.0000, 13.0984, 13.1962, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 13.9690, 14.0619, 14.1543, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.5226, 14.4355, 14.5257, 14.4394, 14.3537, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.4536, 14.5426, 14.6313, 14.7195, 14.8074, 14.8950, 14.9821,\n 15.0689, 15.1553, 15.2414, 15.3272, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.7545, 15.6736, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How do you get better grades?\nQuestion 2: How can I dramatically improve my grades?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.4857, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.6333, -0.5053, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.6%", + "z-score": "12.9", + "p value": "2.78e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.5916, 7.4536, 7.6140, 7.4790, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.4370, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 9.8792, 10.0000, 9.8987, 10.0188, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.5391, 11.4440, 11.3497, 11.4574, 11.3642, 11.2719, 11.1803, 11.2877,\n 11.1971, 11.1073, 11.0183, 11.1253, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.4592, 11.3721, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.2178, 12.1335, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.0824, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.3908, 12.4872, 12.5831, 12.5024, 12.4223, 12.5179, 12.4384, 12.3595,\n 12.4547, 12.3764, 12.2987, 12.2214, 12.1447, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.1073, 12.2016, 12.2954, 12.3888, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.6102, 12.7017, 12.7928, 12.8836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the most efficient way of studying to score a 700 on the GMAT?\nQuestion 2: How do I score above 700 in GMAT? I have been preparing since two months and lately scored only 460 and I have been studying 6 hours everyday?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "190", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "27.9%", + "z-score": "0.921", + "p value": "0.178", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.2261, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.3109, 0.4428, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.5164, 0.4721, 0.5991, 0.7255, 0.8513,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.9215])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "67.7%", + "z-score": "12.7", + "p value": "1.94e-37", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 7.7710, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.4088, 9.2999, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.0000, 9.8987, 9.7986, 9.9187, 9.8198,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 11.8151, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.3754, 12.2891, 12.2034, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.7199, 12.8160, 12.7329])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is a good solar panel installation provider near Lemon Cove, California CA?\nQuestion 2: What is a good solar panel installation provider near Angels Camp, California CA?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "85", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "23.5%", + "z-score": "-0.313", + "p value": "0.623", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.4659, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 4.2563, 4.1265, 4.0000,\n 4.2064, 4.0825, 3.9614, 3.8431, 4.0446, 3.9284, 3.8146, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.9874, 5.8835, 5.7812, 5.6804,\n 5.5811, 5.7382, 5.8936, 5.7955, 5.6986, 5.8522, 5.7566, 5.9084,\n 5.8139, 5.9641, 5.8707, 6.0193, 5.9270, 5.8358, 5.7458, 5.8926,\n 6.0380, 6.1820, 6.0927, 6.0044, 6.1470, 6.0596, 6.2008, 6.3408,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.5807, 6.4957, 6.6308, 6.5465,\n 6.4632, 6.3807, 6.5144, 6.4327, 6.5653, 6.4842, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.7155, 6.8439, 6.7648, 6.8922, 6.8138, 6.7361,\n 6.6591, 6.7854, 6.9107, 6.8343, 6.9587, 7.0823, 7.2051, 7.3271,\n 7.4483, 7.5687, 7.6883, 7.8072, 7.7308, 7.8489, 7.7732, 7.8905,\n 7.8153, 7.9318, 7.8571, 7.9729, 7.8988, 8.0139, 8.1282, 8.0546,\n 7.9816, 8.0952, 8.2082, 8.3205, 8.4322, 8.5433, 8.4706, 8.3984,\n 8.3268, 8.2557, 8.3660, 8.4757, 8.5848, 8.6933, 8.8013, 8.9087,\n 8.8379, 8.7676, 8.8744, 8.9806, 9.0863, 9.1915, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.0061, 10.1058, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.4312, 10.5286, 10.6256, 10.5573, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Who is your favorite Prison Break character?\nQuestion 2: Who is the best looking actor in Prison Break?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "174", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "21.3%", + "z-score": "-1.14", + "p value": "0.872", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.3779, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.1380])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.4536, 7.6140, 7.4790, 7.6376, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.0335, 7.9216, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 8.9586, 8.8602, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.3140, 9.4346, 9.3408, 9.4606, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.8702, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.2253, 10.1429, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.4596, 10.5642, 10.6683, 10.5893,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.7423, 10.8443, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 10.8673, 10.9669,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.0913, 11.1883, 11.2848, 11.3809, 11.4766, 11.4047, 11.3333,\n 11.2624, 11.1919, 11.2872, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: How many time can one attempt for JEE Main in one year?\nQuestion 2: How many times can I give the JEE Mains?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "131", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.757", + "p value": "0.775", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.4576, -1.5119, -1.3151, -1.3697, -1.1761, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.7089, -0.7566])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.4952, 7.6667,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.0139, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.1929, 8.0498, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.4087, 9.2828, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 9.8590, 9.9863, 10.1124, 10.2375, 10.3615, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.5131, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.0254, 11.1392, 11.0368, 11.1500, 11.2623, 11.3740, 11.2732, 11.3842,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.6242, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.7533, 11.8594, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.6287, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.6713, 12.7690, 12.8661, 12.9628,\n 13.0590, 12.9732, 12.8881, 12.9840, 13.0795, 13.1745, 13.0903, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.3002, 13.3933, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.6896, 13.7803, 13.8707, 13.9606,\n 13.8804, 13.9700, 13.8904, 13.9797, 14.0687, 14.1573, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.4382, 14.3607, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: FtM: How much of an effect did Testosterone have on your mental/emotional health when you started HRT?\nQuestion 2: What are examples of mental and emotional health?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.4988, -0.3478, -0.3961,\n -0.2466, -0.0983, -0.1469, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.1429, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.4327, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.0421, 5.9017, 5.7646, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 6.7626, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.0133, 6.9076, 7.0553, 7.2016, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.1813, 7.0812, 6.9824, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.7555, 7.8889, 7.7937, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.0000, 8.1291, 8.0370, 8.1651, 8.0741,\n 7.9839, 7.8948, 8.0219, 7.9336, 7.8463, 7.7598, 7.8859, 8.0111,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.1735, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 10.9220, 11.0235, 10.9458, 10.8686,\n 10.7920, 10.8931, 10.8170, 10.7415, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 11.0913, 11.0194, 11.1164, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.4525, 11.5470, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Which is the best way to pass the 74-322 exam?\nQuestion 2: What is the best way to prepare for the 74-674 exam?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.7408, 1.9795, 2.2133, 2.1054, 2.3333,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.1412, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185, 2.0207,\n 2.2200, 2.4163, 2.3276, 2.2404, 2.4327, 2.3462, 2.2611, 2.4495,\n 2.3651, 2.2819, 2.2000, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 2.2453, 2.1678, 2.0913, 2.2678, 2.1918, 2.3658, 2.2902, 2.4618,\n 2.6316, 2.5560, 2.7235, 2.8893, 3.0533, 2.9775, 3.1394, 3.2998,\n 3.2242, 3.1493, 3.0754, 3.0022, 2.9299, 2.8583, 2.7875, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.6667, 2.8203, 2.7524, 2.9044, 3.0551,\n 3.2044, 3.1363, 3.0688, 3.2163, 3.1492, 3.0827, 3.2285, 3.1623,\n 3.0967, 3.0317, 3.1755, 3.1109, 3.0467, 2.9832, 2.9202, 3.0619,\n 2.9991, 2.9369, 3.0770, 3.0151, 3.1539, 3.0923, 3.2299, 3.3665,\n 3.3049, 3.4403, 3.5748, 3.7084, 3.6466, 3.5853, 3.7176, 3.6566,\n 3.5960, 3.7270, 3.6667, 3.6068, 3.5474, 3.6770, 3.6178, 3.5590,\n 3.5007, 3.4428, 3.5708, 3.5131, 3.4558, 3.5827, 3.5256, 3.6515,\n 3.5946, 3.7196, 3.8438, 3.7870, 3.9104, 4.0330, 4.1549, 4.0980,\n 4.0415, 4.1624, 4.1061, 4.0501, 4.1700, 4.1143, 4.0589, 4.0038,\n 4.1226, 4.0678, 4.0132, 3.9590, 3.9052, 4.0228, 3.9691, 3.9158,\n 4.0325, 3.9793, 4.0953, 4.0423, 4.1576, 4.2723, 4.2193, 4.3333,\n 4.4468, 4.5596, 4.5066, 4.4538, 4.5659, 4.5134, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.4563, 9.3582, 9.2611, 9.3834,\n 9.2874, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.4935, 11.4080, 11.3232, 11.4263,\n 11.3423, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.9755, 12.8957, 12.9891, 13.0821, 13.1746, 13.2668, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.3060, 13.2288, 13.1520, 13.0758, 13.1667,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.2864, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is operational research?\nQuestion 2: What is operations research?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.5274, 0.4778, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.9589, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.4540, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.1754, 10.2923, 10.1948, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.7074, 10.6145,\n 10.7257, 10.8363, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.6206, 11.5311, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.2891, 12.2034, 12.1184, 12.2178, 12.1335, 12.0499,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.5685, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.1746, 13.2668, 13.3585, 13.2796,\n 13.3710, 13.2927, 13.2149, 13.3060, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is your new year resolution, short term and long term goal for 2017?\nQuestion 2: What will be your New Year's resolution for 2017?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.5993, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.3084, -1.3587,\n -1.1832, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -0.8076, -0.8577, -0.9074, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.3734, -2.4099, -2.4461, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.7358, -2.6047, -2.6393, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.1379, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.4263, 10.3301,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 10.9291, 11.0389, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 11.9455, 11.8571, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.4746, 12.3883, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.7199, 12.6367, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 13.0307, 12.9491, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.3585, 13.4499,\n 13.3710, 13.4620, 13.3838, 13.3060, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Is WIMPs incorrect? Will wave-particle duality ever be understood to be a particle and its associated wave in the strongly interacting dark matter?\nQuestion 2: Would Einstein and de Broglie have realized a strongly interacting dark matter is the medium for gravitational and wave-particle duality waves?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -1.9052, -1.9868, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.2599, -1.0646, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.6547,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.5311, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.3205, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.2657, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.3369, -0.3780, -0.2513, -0.1253, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.3485,\n 7.1358, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.1654, 8.0017, 8.1654, 8.3267, 8.1684, 8.0139, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.5105, 8.3716,\n 8.5218, 8.6702, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.9351, 10.0673, 9.9384, 9.8116,\n 9.9433, 10.0737, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.3459, 10.4704, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.7277, 10.8477, 10.9669, 11.0851, 10.9727, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.6772, 11.7881, 11.8982, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.2314, 12.1295, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.6592, 12.7622, 12.8645, 12.9662, 13.0674, 13.1680, 13.0699,\n 12.9728, 12.8766, 12.7812, 12.6867, 12.5930, 12.6939, 12.7943, 12.7017,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.0146, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.3128, 13.4086, 13.5039, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.7926, 13.7054, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.8120, 13.7270, 13.8193, 13.9111, 14.0025, 14.0936, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.4651, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.8219, 14.7406, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Will NeoBux (PTC site) last?\nQuestion 2: What is an overall rented referral clicks average on Neobux?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.7581, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.9623, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.9671, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.2719, 1.2111, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 1.0759, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.0435, 0.9901,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.7789, 0.7336, 0.8607, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.8866, 1.0106, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.9119, 1.0336, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.3051, 7.1317, 7.3113, 7.1435, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.1216, 8.2711, 8.1428, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.0667, 8.2121, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.5191, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.7376,\n 9.6348, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.6490, 9.5543, 9.6732, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.6307, 9.5400, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.2592, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 11.0569, 10.9773, 11.0793, 11.1807, 11.1018, 11.2028, 11.3032, 11.2250,\n 11.3249, 11.4244, 11.5234, 11.6220, 11.7200, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.6840, 11.6082, 11.7050, 11.8014, 11.7261, 11.6514, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.9586, 12.0529, 11.9792, 11.9060, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: Where can I get a consistent high quality service for commercial cleaning in Bangalore?\nQuestion 2: What are the lowest fee universities for ms in australia?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "18.5%", + "z-score": "-1.22", + "p value": "0.888", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 6.7543, 6.5354, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.1187, 7.3030,\n 7.1232, 6.9488, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.2525, 8.4017, 8.5491, 8.6948, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.6000, 8.7419, 8.8823, 8.7599, 8.8991, 9.0370,\n 8.9169, 8.7986, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.5939, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.7635, 10.8729, 10.9816, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.3721, 11.4762, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.2034, 12.1184, 12.0341, 11.9504, 11.8673,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.3263, 12.2467, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.3163, 12.4109, 12.3342, 12.4283,\n 12.5221, 12.4460, 12.5394, 12.4638, 12.5568, 12.4818, 12.5745, 12.5000,\n 12.5923, 12.5183, 12.4448, 12.5367, 12.6283, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What are the most common ways people get rich in Singapore apart from inherited wealth and winning the lottery?\nQuestion 2: Wealth Creation: What are the most common ways to work only four days a week?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.9%", + "z-score": "0.946", + "p value": "0.172", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.5175, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.8617, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.9733, 0.9152, 0.8577, 1.0141, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.5808, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.6299, 0.7539, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.8248, 0.9461])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.6%", + "z-score": "10.9", + "p value": "4.93e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.5593, 7.4521, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.3638, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.6064, 7.7387, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.0370, 8.9496, 8.8631, 8.7773,\n 8.6924, 8.8108, 8.7267, 8.8443, 8.9612, 8.8778, 8.9940, 8.9113,\n 8.8294, 8.7482, 8.6677, 8.7831, 8.8978, 9.0117, 8.9319, 9.0452,\n 8.9660, 9.0786, 9.1905, 9.3017, 9.2232, 9.3338, 9.4438, 9.5532,\n 9.4752, 9.3979, 9.3212, 9.4299, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.6322, 9.5577, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 9.9642, 9.8918, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.2565, 10.3566, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.3439, 10.4427, 10.5410, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.6944, 10.7910, 10.8872, 10.8184, 10.9141])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What will happen to the next Star Wars movies after Carrie Fisher's death?\nQuestion 2: What will Carrie Fisher's death mean for the next Star Wars movies?\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.5333, -2.5820,\n -2.3736, -2.4228, -2.2180, -2.2678, -2.3170, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.1500, -3.1889, -3.2276, -3.2660,\n -3.3041, -3.3420, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.2348, -3.2717,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.2636, -3.2998, -3.3359, -3.3717,\n -3.4073, -3.4428, -3.2925, -3.3282, -3.3637, -3.3989, -3.4340, -3.4689,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.3799, -3.4140, -3.4478, -3.4816,\n -3.5151, -3.5485, -3.5817, -3.6148, -3.6477, -3.6805, -3.7131, -3.7455,\n -3.7778, -3.6407, -3.6731, -3.7055, -3.7376, -3.6021, -3.6345, -3.6667,\n -3.5325, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "182", + "Fraction of T in Greenlist": "91.5%", + "z-score": "21.7", + "p value": "3e-104", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.7104, 11.8289, 11.9464, 12.0630,\n 12.1786, 12.2933, 12.4072, 12.5201, 12.6322, 12.7435, 12.8540, 12.9636,\n 13.0725, 13.1806, 13.2879, 13.3945, 13.5004, 13.6056, 13.7100, 13.8138,\n 13.9169, 14.0193, 14.1211, 14.2222, 14.3227, 14.4226, 14.5219, 14.6206,\n 14.7187, 14.8162, 14.9132, 15.0096, 15.1054, 15.2007, 15.2955, 15.3898,\n 15.4835, 15.5767, 15.6694, 15.7617, 15.8534, 15.9447, 16.0355, 16.1258,\n 16.2157, 16.3051, 16.3941, 16.4826, 16.5707, 16.6584, 16.7457, 16.8325,\n 16.9189, 17.0050, 17.0906, 17.1758, 17.2607, 17.3452, 17.4292, 17.5130,\n 17.5963, 17.6793, 17.7619, 17.8442, 17.9261, 18.0077, 18.0889, 18.1698,\n 18.2503, 18.3305, 18.4104, 18.4900, 18.5693, 18.6482, 18.7268, 18.8051,\n 18.8832, 18.9609, 19.0383, 19.1154, 19.1922, 19.2688, 19.3450, 19.4210,\n 19.4967, 19.5721, 19.6472, 19.7221, 19.7967, 19.8710, 19.9451, 20.0189,\n 20.0925, 20.1658, 20.2388, 20.3116, 20.3842, 20.4565, 20.5286, 20.6004,\n 20.6720, 20.7434, 20.8145, 20.8854, 20.9560, 21.0265, 21.0967, 21.1667,\n 21.2364, 21.3060, 21.3753, 21.4444, 21.5133, 21.5820, 21.6505])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Are the following two questions asking the same thing? Answer 'yes' or 'no':\nQuestion 1: What is the weirdest thing about you? Are you proud of it?\nQuestion 2: What is the weirdest thing about you?\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.3416, -1.3862,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.3035, -1.1651, -1.0276, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.0284, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "62.7%", + "z-score": "10.1", + "p value": "3.57e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.2488, 7.1240, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.8853, 9.0179, 8.9086, 8.8007, 8.9324, 8.8260, 8.9567,\n 9.0863, 8.9815, 8.8780, 8.7757, 8.6747, 8.5749, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.2351, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.2514, 10.1627, 10.0748])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "accuracy_without_watermark": 0.54, + "accuracy_with_watermark": 0.59, + "f1_without_watermark": 0.23333333333333334, + "f1_with_watermark": 0.22641509433962265 + } + } + }, + "stsb": { + "train": { + "results": [ + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A plane is taking off.\nSentence 2: An air plane is taking off.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -0.7559, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.1896, -2.2258, -2.2618, -2.2977, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188, 4.9010, 5.1711,\n 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855, 5.4271, 5.6614, 5.4444,\n 5.2372, 5.4678, 5.2705, 5.0811, 5.3072, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.0576, 5.2697, 5.4772, 5.6805, 5.5234, 5.7229, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425,\n 6.6172, 6.7893, 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.3467,\n 7.2169, 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.6000, 7.7460, 7.6339,\n 7.7784, 7.6681, 7.5593, 7.4521, 7.3464, 7.2421, 7.1393, 7.0379, 7.1813,\n 7.0812, 6.9824, 6.8849, 6.7886, 6.6935, 6.5997, 6.5069, 6.4153, 6.3248,\n 6.2354, 6.1470, 6.2883, 6.4283, 6.3408, 6.4795, 6.6171, 6.5303, 6.4444,\n 6.3595, 6.4957, 6.6308, 6.7648, 6.8977, 7.0296, 7.1605, 7.2904, 7.4193,\n 7.5472, 7.6742, 7.5895, 7.7155, 7.6315, 7.5484, 7.6734, 7.7976, 7.9209,\n 8.0434, 7.9608, 7.8791, 8.0006, 7.9196, 7.8393, 7.9600, 7.8803, 8.0002,\n 8.1192, 8.0402, 7.9619, 7.8842, 8.0024, 7.9253, 7.8489, 7.7732, 7.6980,\n 7.6235, 7.5495, 7.4762, 7.4034, 7.3312, 7.4482, 7.3765, 7.3054, 7.2348,\n 7.1647, 7.0952, 7.0262, 6.9577, 6.8897, 6.8222, 6.7552, 6.8707, 6.9856,\n 6.9189, 7.0330, 7.1465, 7.0801, 7.0142, 6.9488, 7.0614, 7.1735, 7.2849,\n 7.3958, 7.5061, 7.6158, 7.7249, 7.8335, 7.9415, 8.0490, 7.9833, 8.0902,\n 8.0249, 7.9601, 8.0663, 8.1721, 8.2773, 8.3820, 8.3173, 8.2531, 8.3572,\n 8.2933, 8.2298, 8.3333, 8.2702, 8.3732, 8.4757, 8.5778, 8.6794, 8.7805,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a large flute.\nSentence 2: A man is playing a flute.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.2778, -1.1263, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.0235, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.0820, -1.1237, -1.1651, -1.2063, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is spreading shreded cheese on a pizza.\nSentence 2: A man is spreading shredded cheese on an uncooked pizza.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -0.9492, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.9415, -0.8109, -0.8513,\n -0.7216, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.7889, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.8766, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.9333, 1.8543, 2.0397, 2.2226, 2.1436, 2.0656,\n 1.9887, 2.1678, 2.3448, 2.5198, 2.4423, 2.3658, 2.2902, 2.2156,\n 2.3868, 2.3126, 2.2393, 2.1669, 2.0954, 2.0247, 2.1917, 2.3570,\n 2.2862, 2.2162, 2.1470, 2.0785, 2.2405, 2.1723, 2.3324, 2.4910,\n 2.4228, 2.3552, 2.2884, 2.2222, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.0548, 2.2074, 2.3586, 2.2952, 2.2323, 2.3817, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.3660, 2.5103, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.4121, 2.3529, 2.2943, 2.2361, 2.1783,\n 2.1210, 2.2608, 2.3995, 2.3422, 2.2852, 2.4225, 2.5589, 2.6943,\n 2.6370, 2.7713, 2.7143, 2.6576, 2.7906, 2.9227, 2.8660, 2.8098,\n 2.7539, 2.6984, 2.8288, 2.7735, 2.7186, 2.6640, 2.6099, 2.5560,\n 2.6846, 2.8124, 2.7585, 2.7050, 2.6519, 2.5990, 2.7253, 2.6726,\n 2.7979, 2.9225, 2.8698, 2.8174, 2.7654, 2.7137, 2.8368, 2.7852,\n 2.7340, 2.6830, 2.6323, 2.5820, 2.7036, 2.8245, 2.7741, 2.7240,\n 2.6742, 2.6247, 2.7443, 2.6949, 2.8137, 2.9320, 2.8825, 2.8333,\n 2.7844, 2.7358, 2.8528, 2.8043, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Three men are playing chess.\nSentence 2: Two men are playing chess.\nSimilarity score:", + "true_label": 2.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.4147, -0.2756, -0.3205, -0.1826,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.2146, -0.2568, -0.1280, -0.1703,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.0420, -0.0838, -0.1253, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.0000, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.8431, 4.0446, 4.2426, 4.1260, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.1740, 4.3614, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.9747, 5.1490, 5.0410, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 6.8849, 6.7886, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.2532, 7.1591, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.4501, 9.3611, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.5620, 9.6758, 9.5902, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 9.8197, 9.7405,\n 9.6619, 9.5840, 9.6921, 9.6148, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.6008, 9.5258, 9.4513, 9.5577, 9.6635, 9.5896, 9.6948, 9.7996,\n 9.9038, 10.0074, 9.9340, 10.0371, 10.1398, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.5001, 10.4281, 10.5278, 10.6271, 10.5556,\n 10.6544, 10.7527, 10.6817, 10.6111, 10.7090, 10.8064, 10.9034, 11.0000,\n 10.9299, 11.0261, 10.9564, 10.8872, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing the cello.\nSentence 2: A man seated is playing the cello.\nSimilarity score:", + "true_label": 4.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.0508, -0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.2381, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.0127, 12.1073, 12.0327, 12.1270, 12.0529, 12.1468, 12.0731, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Some men are fighting.\nSentence 2: Two men are fighting.\nSimilarity score:", + "true_label": 4.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.7237, 0.9152, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 0.8003, 0.9671, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.2060, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.5714, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.4284, 1.5617, 1.5110, 1.4606,\n 1.5926, 1.7237, 1.6732, 1.6230, 1.7529, 1.8821, 1.8317, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.7408,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.6737, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.7310, 1.6843, 1.6378, 1.7592, 1.8799, 1.8333,\n 1.7870, 1.9068, 2.0259, 1.9795, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.8007, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.9462, 11.0554, 11.1640, 11.0724, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.7696, 11.8719, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.1036, 12.2034, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.5264, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.3585, 13.4499,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.6546, 13.7442, 13.6667,\n 13.7559, 13.8447, 13.9332, 14.0214, 13.9446, 14.0324, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is smoking.\nSentence 2: A man is skating.\nSimilarity score:", + "true_label": 0.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.3769, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.2381, 0.1898, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.1761, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.4103, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 10.9585, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.4384,\n 12.3289, 12.4370, 12.5443, 12.4365, 12.3299, 12.4370, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.6508, 12.5485, 12.6529, 12.7567, 12.6557,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.4674, 13.3710, 13.4691, 13.5668, 13.4715,\n 13.3770, 13.4745, 13.5714, 13.6679, 13.7638, 13.6707, 13.7663, 13.6742,\n 13.5827, 13.6781, 13.5876, 13.6826, 13.7772, 13.8713, 13.7818, 13.8756,\n 13.7870, 13.8804, 13.9735, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.3449, 14.4355, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.7113, 14.7998, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 14.9950, 15.0810, 15.1667,\n 15.2520, 15.1712, 15.0909, 15.1761, 15.2609, 15.1813, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man is playing the piano.\nSentence 2: The man is playing the guitar.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.5228, 3.7273, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.0332, 5.1978, 5.0990, 5.0017, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.4482, 5.6032, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.2354, 6.3768, 6.2883, 6.2008, 6.1143,\n 6.0288, 6.1685, 6.3070, 6.4444, 6.5807, 6.7159, 6.6308, 6.7648,\n 6.6804, 6.8133, 6.9451, 7.0759, 7.2058, 7.3346, 7.4625, 7.3786,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.2032, 8.1214, 8.0403, 8.1602, 8.0798, 8.1989, 8.1192,\n 8.0402, 7.9619, 7.8842, 7.8072, 7.9253, 7.8489, 7.7732, 7.8905,\n 8.0070, 8.1229, 8.0476, 8.1628, 8.0880, 8.2024, 8.1282, 8.2420,\n 8.1683, 8.2813, 8.2082, 8.3205, 8.2479, 8.1758, 8.2874, 8.3984,\n 8.3268, 8.4371, 8.3660, 8.4757, 8.5848, 8.6933, 8.8013, 8.9087,\n 9.0155, 9.1218, 9.0510, 8.9806, 8.9107, 8.8413, 8.9469, 8.8780,\n 8.8094, 8.7414, 8.6738, 8.7788, 8.8832, 8.9872, 9.0906, 9.1936,\n 9.1262, 9.2287, 9.1617, 9.2637, 9.3651, 9.4661, 9.5666, 9.6667,\n 9.7663, 9.8654, 9.7987, 9.7325, 9.6666, 9.6011, 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing on a guitar and singing.\nSentence 2: A woman is playing an acoustic guitar and singing.\nSimilarity score:", + "true_label": 2.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.1602, -2.2030, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.5769, -2.4225, -2.4623, -2.5019,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.8124, -2.8490, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.0317, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.9530, 7.8444, 7.9849, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.1152, 8.0139,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 7.8889, 7.7937, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.5210, 8.6436,\n 8.7652, 8.6770, 8.5896, 8.5030, 8.6238, 8.7439, 8.6581, 8.5732,\n 8.4891, 8.4057, 8.3231, 8.2413, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.5516, 8.6677, 8.5879, 8.7033, 8.8179, 8.9319, 8.8527,\n 8.9660, 8.8874, 8.8095, 8.9221, 9.0340, 8.9567, 8.8800, 8.9912,\n 9.1018, 9.0257, 9.1357, 9.2450, 9.3537, 9.4619, 9.5695, 9.6764,\n 9.6008, 9.7072, 9.6322, 9.7380, 9.8433, 9.9481, 10.0523, 9.9778,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.3154, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.9220, 10.8505, 10.9480, 11.0450, 11.1415, 11.2376, 11.1667,\n 11.2624, 11.3577, 11.4525, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person is throwing a cat on to the ceiling.\nSentence 2: A person throws a cat on the ceiling.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -1.8598, -1.8985, -1.9370, -1.9753, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.0469, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.5032, 7.4044, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 7.9336, 7.8463, 7.9724, 8.0976, 8.0111,\n 8.1354, 8.0497, 8.1731, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.4057, 8.5249, 8.6433, 8.5607, 8.4788, 8.5964, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.7831, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.0786, 9.0000, 8.9221, 9.0340, 8.9567, 8.8800, 8.9912,\n 9.1018, 9.2118, 9.1357, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.4837, 9.4103, 9.5161, 9.6214,\n 9.5485, 9.4761, 9.4042, 9.5089, 9.4375, 9.3665, 9.2961, 9.4002,\n 9.3302, 9.4338, 9.5369, 9.6394, 9.7415, 9.6719, 9.6028, 9.7043,\n 9.8054, 9.9060, 10.0061, 10.1058, 10.0371, 10.1363, 10.2350, 10.1667,\n 10.2650, 10.3628, 10.2949, 10.2273, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man hit the other man with a stick.\nSentence 2: The man spanked the other man with a stick.\nSimilarity score:", + "true_label": 4.199999809265137, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.2144, -0.2669, -0.3189, -0.1588, 0.0000,\n -0.0525, 0.1045, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.3884, 0.5164, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.4145, 0.3721, 0.4949, 0.4525, 0.4103, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.6%", + "z-score": "9.93", + "p value": "1.55e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321, 2.1004, 1.9052,\n 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284, 3.1177, 3.3968, 3.6667,\n 3.4915, 3.7524, 4.0056, 3.8367, 4.0825, 4.3217, 4.1586, 4.3916, 4.2339,\n 4.4610, 4.6829, 4.8999, 4.7469, 4.9592, 4.8107, 4.6664, 4.8742, 4.7336,\n 4.9373, 4.8003, 5.0000, 4.8662, 5.0623, 5.2549, 5.1241, 5.3134, 5.4997,\n 5.6830, 5.8635, 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648,\n 6.9282, 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460, 7.6339,\n 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779, 7.7723, 7.6681,\n 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139, 7.9138, 8.0483, 7.9495,\n 8.0829, 7.9853, 8.1176, 8.2488, 8.3789, 8.5079, 8.4116, 8.3164, 8.4444,\n 8.5715, 8.4774, 8.6035, 8.7287, 8.8529, 8.7600, 8.6679, 8.5769, 8.4868,\n 8.3976, 8.3093, 8.2219, 8.1354, 8.0497, 8.1731, 8.2956, 8.4173, 8.3324,\n 8.4532, 8.5732, 8.6924, 8.8108, 8.9285, 9.0453, 8.9612, 8.8778, 8.7952,\n 8.9113, 8.8294, 8.7482, 8.6677, 8.5879, 8.5088, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 8.8448, 8.7681, 8.8800, 8.8039, 8.9151,\n 8.8396, 8.7647, 8.6903, 8.8008, 8.9107, 9.0200, 8.9461, 9.0548, 9.1629,\n 9.2704, 9.1970, 9.3040, 9.2311, 9.1587, 9.2651, 9.1932, 9.2990, 9.4042,\n 9.3328, 9.4375, 9.5416, 9.6452, 9.5743, 9.6774, 9.6069, 9.5369, 9.4673,\n 9.3982, 9.5007, 9.6028, 9.5341, 9.6356, 9.7367, 9.6684, 9.6005, 9.5331,\n 9.6336, 9.5666, 9.6667, 9.7663, 9.6996, 9.7987, 9.8974, 9.8311, 9.9294])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman picks up and holds a baby kangaroo.\nSentence 2: A woman picks up and holds a baby kangaroo in her arms.\nSimilarity score:", + "true_label": 4.599999904632568, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.3091, -0.3522, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.4644, -0.5053, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 1.9415, 2.1909,\n 2.0738, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.1160, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 3.9284, 4.1260, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.0657, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.3026, 4.4809, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.9346, 5.0990, 5.2615, 5.4222,\n 5.5811, 5.4832, 5.3867, 5.2915, 5.1977, 5.3541, 5.2614, 5.4160,\n 5.3243, 5.4772, 5.6286, 5.5377, 5.6875, 5.8358, 5.9827, 6.1283,\n 6.2725, 6.4153, 6.3248, 6.2354, 6.1470, 6.2883, 6.2008, 6.3408,\n 6.4795, 6.6171, 6.7536, 6.6667, 6.8019, 6.7159, 6.6308, 6.5465,\n 6.4632, 6.5970, 6.7298, 6.8615, 6.9923, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.7566, 7.8808, 7.7976, 7.9209, 8.0434, 7.9608,\n 7.8791, 8.0006, 7.9196, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.7033, 8.6241, 8.5456, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.2118, 9.1357, 9.2450, 9.3537, 9.2782, 9.2032, 9.1287,\n 9.2368, 9.3443, 9.4513, 9.5577, 9.4837, 9.5896, 9.5161, 9.4432,\n 9.3708, 9.2990, 9.4042, 9.5089, 9.6130, 9.7167, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.4281, 10.3566, 10.4563, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.6111, 10.5410, 10.6389, 10.7363, 10.8333,\n 10.9299, 11.0261, 10.9564, 10.8872, 10.8184, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a flute.\nSentence 2: A man is playing a bamboo flute.\nSimilarity score:", + "true_label": 3.867000102996826, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.4147, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.4006, 6.2994, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640,\n 6.2725, 6.1820, 6.0927, 6.2354, 6.1470, 6.0596, 6.2008, 6.1143,\n 6.2541, 6.3928, 6.3070, 6.2222, 6.1383, 6.2755, 6.4116, 6.5465,\n 6.6804, 6.5970, 6.7298, 6.8615, 6.9923, 7.1220, 7.2508, 7.3786,\n 7.2956, 7.2134, 7.1319, 7.2587, 7.1779, 7.0980, 7.0187, 7.1443,\n 7.0658, 6.9879, 7.1125, 7.0353, 7.1590, 7.2818, 7.2051, 7.1291,\n 7.0537, 7.1755, 7.2966, 7.4168, 7.5364, 7.4613, 7.5800, 7.6980,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.0880, 8.0139, 7.9403, 8.0546,\n 7.9816, 7.9091, 7.8372, 7.9507, 8.0636, 7.9921, 7.9211, 7.8507,\n 7.7808, 7.8928, 8.0042, 7.9347, 8.0455, 8.1556, 8.2652, 8.3742,\n 8.4826, 8.4133, 8.3446, 8.4523, 8.5595, 8.6662, 8.7724, 8.8780,\n 8.8094, 8.7414, 8.8464, 8.7788, 8.7116, 8.8160, 8.7492, 8.6828,\n 8.7867, 8.8900, 8.9929, 9.0952, 9.0292, 9.1310, 9.0653, 9.0000,\n 8.9351, 9.0364, 8.9718, 9.0726, 9.1730, 9.1088, 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person is folding a piece of paper.\nSentence 2: Someone is folding a piece of paper.\nSimilarity score:", + "true_label": 4.666999816894531, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.0094, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.5752, -1.6125, -1.6496, -1.6865, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 8.0042,\n 7.8905, 8.0335, 7.9216, 7.8113, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.0067, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 9.8389, 9.9547, 9.8632, 9.7725, 9.8877, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.1627, 10.0748, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.6920, 10.6076, 10.7143, 10.6306, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.3812, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.3402, 11.2602, 11.3610, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.5495, 12.6439, 12.5657, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.6918, 12.7847, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.0000,\n 13.0910, 13.1815, 13.1063, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is running on the road.\nSentence 2: A panda dog is running on the road.\nSimilarity score:", + "true_label": 1.6670000553131104, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.2144, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.2562, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.0000, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.0427, 0.0000,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.0413, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "150", + "Fraction of T in Greenlist": "75.4%", + "z-score": "16.4", + "p value": "7.87e-61", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.0990, 9.2418, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.0673, 9.9384, 10.0698,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.4608, 10.3397,\n 10.2202, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.7551, 12.8599,\n 12.7542, 12.8586, 12.9624, 12.8582, 12.9616, 12.8586, 12.9616, 12.8598,\n 12.9624, 13.0643, 13.1657, 13.0655, 13.1665, 13.2669, 13.3667, 13.4660,\n 13.5647, 13.6630, 13.7606, 13.8578, 13.9544, 14.0505, 14.1462, 14.2413,\n 14.1442, 14.0479, 14.1429, 14.0475, 14.1422, 14.2364, 14.3302, 14.4234,\n 14.5162, 14.4225, 14.3295, 14.2373, 14.1458, 14.2388, 14.3313, 14.4234,\n 14.5150, 14.6062, 14.6970, 14.7874, 14.8773, 14.7877, 14.8773, 14.9666,\n 15.0555, 15.1440, 15.0555, 15.1438, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.5793, 15.6653, 15.7509, 15.8362, 15.9211, 16.0057, 16.0900, 16.1739,\n 16.2574, 16.1713, 16.2547, 16.1693, 16.2525, 16.3353, 16.2507, 16.3333,\n 16.2494, 16.3318, 16.2486, 16.3308, 16.4127, 16.4943, 16.4118])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A dog is trying to get bacon off his back.\nSentence 2: A dog is trying to eat the bacon on its back.\nSimilarity score:", + "true_label": 3.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 1.1088, 1.0265, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.4345, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "74", + "Fraction of T in Greenlist": "37.2%", + "z-score": "3.97", + "p value": "3.59e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.8034, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 2.3238, 2.2133, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.0428, 1.9462, 1.8516,\n 2.0647, 1.9711, 2.1798, 2.3851, 2.2916, 2.1997, 2.1094, 2.0207,\n 2.2200, 2.1320, 2.3276, 2.5205, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.3651, 2.2819, 2.4667, 2.6491, 2.5660, 2.4841, 2.6632, 2.8402,\n 2.7585, 2.9329, 2.8518, 2.7717, 2.6928, 2.6148, 2.5378, 2.4618,\n 2.3868, 2.3126, 2.2393, 2.4077, 2.5743, 2.5011, 2.4286, 2.5927,\n 2.5207, 2.6828, 2.8433, 2.7713, 2.7001, 2.6296, 2.7875, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.5991, 2.7524, 2.6852, 2.6186,\n 2.5527, 2.4874, 2.6381, 2.5731, 2.7222, 2.8701, 2.8051, 2.7406,\n 2.6768, 2.6135, 2.7591, 2.6961, 2.8402, 2.9832, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.8753, 2.8141, 2.9537, 3.0923, 3.0311, 2.9704,\n 3.1076, 3.0471, 3.1831, 3.3181, 3.2577, 3.1977, 3.1382, 3.0792,\n 3.2124, 3.3447, 3.4762, 3.6068, 3.7366, 3.6770, 3.6178, 3.5590,\n 3.5007, 3.6289, 3.5708, 3.5131, 3.6401, 3.5827, 3.5256, 3.4689,\n 3.5946, 3.5382, 3.6629, 3.7870, 3.7306, 3.6745, 3.6188, 3.5635,\n 3.6862, 3.6310, 3.7528, 3.8740, 3.8189, 3.7641, 3.7097, 3.6556,\n 3.7755, 3.7216, 3.8406, 3.9590, 3.9052, 3.8516, 3.9691, 3.9158,\n 3.8627, 3.9793, 3.9265, 3.8739, 3.8216, 3.9372, 3.8851, 4.0000,\n 4.1143, 4.0622, 4.0105, 3.9590, 3.9078, 4.0210, 3.9699])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The polar bear is sliding on the snow.\nSentence 2: A polar bear is sliding across the snow.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.6737, -0.7139, -0.7539, -0.6266, -0.6667,\n -0.7065, -0.5803, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321, 2.1004, 1.9052,\n 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284, 3.1177, 3.3968, 3.6667,\n 3.4915, 3.7524, 4.0056, 4.2515, 4.0825, 4.3217, 4.5547, 4.7819, 5.0037,\n 4.8407, 5.0576, 5.2697, 5.4772, 5.6805, 5.5234, 5.7229, 5.5705, 5.7664,\n 5.6183, 5.8108, 5.6667, 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 5.8140,\n 5.6830, 5.5549, 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828,\n 6.3509, 6.2302, 6.3960, 6.2776, 6.4413, 6.3249, 6.2106, 6.0982, 5.9876,\n 5.8789, 5.7719, 5.6667, 5.5630, 5.4610, 5.6220, 5.7812, 5.9386, 6.0943,\n 5.9932, 5.8936, 6.0474, 5.9491, 5.8522, 5.7566, 5.9084, 5.8139, 5.9641,\n 6.1128, 6.2601, 6.1664, 6.3122, 6.4566, 6.5997, 6.5069, 6.6486, 6.5569,\n 6.4663, 6.3768, 6.5169, 6.4283, 6.5672, 6.4795, 6.3928, 6.3070, 6.4444,\n 6.3595, 6.2755, 6.4116, 6.5465, 6.4632, 6.5970, 6.7298, 6.6471, 6.7788,\n 6.9094, 7.0391, 7.1678, 7.0857, 7.0043, 6.9237, 7.0513, 6.9714, 7.0980,\n 7.2236, 7.1443, 7.0658, 7.1904, 7.1125, 7.2363, 7.1590, 7.0823, 7.0063,\n 7.1291, 7.2510, 7.3721, 7.4924, 7.6120, 7.7308, 7.8489, 7.7732, 7.8905,\n 8.0070, 7.9318, 7.8571, 7.7831, 7.7096, 7.8253, 7.9403, 7.8673, 7.9816,\n 7.9091, 7.8372, 7.9507, 8.0636, 8.1758, 8.2874, 8.3984, 8.5088, 8.4371,\n 8.5469, 8.6560, 8.7646, 8.6933, 8.8013, 8.9087, 8.8379, 8.7676, 8.8744,\n 8.9806, 9.0863, 9.0164, 9.1215, 9.2261, 9.3302, 9.2607, 9.3642, 9.4673,\n 9.3982, 9.3295, 9.2613, 9.1936, 9.2960, 9.3980, 9.4995, 9.6005, 9.7011,\n 9.8012, 9.7337, 9.6667, 9.7663, 9.6996, 9.6334, 9.7325, 9.8311, 9.9294,\n 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 3.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is writing.\nSentence 2: A woman is swimming.\nSimilarity score:", + "true_label": 0.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.0359, -1.0812, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -0.9048, -0.9492, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.6783, -0.5410, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.6058, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 6.7886, 6.6935, 6.5997,\n 6.5069, 6.6486, 6.7890, 6.9282, 6.8364, 6.9743, 7.1110, 7.0201,\n 6.9303, 6.8414, 6.9768, 7.1111, 7.2443, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.9336, 8.0598, 8.1850, 8.3093, 8.2219,\n 8.3453, 8.4679, 8.3813, 8.2956, 8.4173, 8.5381, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.9285, 9.0453, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.9249, 9.8431, 9.9524, 10.0611, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.5145, 10.4341, 10.5393, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.7175, 10.6397, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.8170, 10.7415, 10.6665, 10.5921, 10.5181, 10.6187,\n 10.5453, 10.4724, 10.5725, 10.6722, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.2376, 11.3333,\n 11.2624, 11.1919, 11.1218, 11.2171, 11.1475, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A cat is rubbing against baby's face.\nSentence 2: A cat is rubbing against a baby.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.8478, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -3.1327, -2.9976, -3.0317, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.0674, -3.1009, -3.1342, -3.1674, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.0844, 4.9747, 5.1490, 5.0410, 4.9348, 4.8305, 4.7278, 4.6268,\n 4.5274, 4.4296, 4.6000, 4.7683, 4.6715, 4.5760, 4.7419, 4.6476,\n 4.8113, 4.7181, 4.6262, 4.7875, 4.6967, 4.8561, 5.0138, 4.9237,\n 5.0795, 5.2338, 5.3865, 5.5377, 5.6875, 5.8358, 5.9827, 6.1283,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.3768, 6.2883, 6.4283, 6.5672,\n 6.7049, 6.8414, 6.7536, 6.8889, 7.0231, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.5381, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.6321, 8.7482, 8.8636, 8.7831, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.6963, 8.8095, 8.9221, 9.0340, 8.9567, 9.0679, 8.9912,\n 8.9151, 9.0257, 9.1357, 9.0601, 8.9851, 8.9107, 9.0200, 8.9461,\n 9.0548, 8.9815, 9.0895, 9.1970, 9.1242, 9.2311, 9.1587, 9.0869,\n 9.0155, 9.1218, 9.0510, 8.9806, 9.0863, 9.0164, 8.9469, 9.0520,\n 9.1566, 9.2607, 9.1916, 9.2952, 9.3982, 9.3295, 9.4320, 9.5341,\n 9.6356, 9.7367, 9.6684, 9.6005, 9.7011, 9.6336, 9.7337, 9.6667,\n 9.6000, 9.5338, 9.4680, 9.5675, 9.6666, 9.6011, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man is riding a horse.\nSentence 2: A man is riding on a horse.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.4162, -1.4551, -1.4938, -1.3620,\n -1.2310, -1.1007, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -0.9897, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man pours oil into a pot.\nSentence 2: A man pours wine in a pot.\nSimilarity score:", + "true_label": 3.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.5023, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.3736, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.0473, -0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.2319, 0.3698, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.5879, 0.5410, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.2487, 0.2067, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.4188, 8.5649, 8.7093,\n 8.5819, 8.7250, 8.6000, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.3721, 9.5021, 9.3901, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 9.9469, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.3411, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.6547, 10.7678, 10.8801, 10.9917, 11.1026, 11.0070, 11.1172, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.7395,\n 11.8427, 11.7543, 11.8571, 11.7696, 11.8719, 11.9737, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.2891, 12.2034, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.5264, 12.4430, 12.5401, 12.4575, 12.5542, 12.6504,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.1746, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A girl is playing a guitar.\nSimilarity score:", + "true_label": 2.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.2542, -0.3038, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.0479, -0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 10.8421, 10.7671, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.5235, 11.4525, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A panda is sliding down a slide.\nSentence 2: A panda slides down a slide.\nSimilarity score:", + "true_label": 4.599999904632568, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.5556, -1.6037, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.2516, -1.0973, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.1390, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.1251, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570, 2.1939, 2.4910, 2.3333,\n 2.6186, 2.8947, 2.7406, 2.5924, 2.4495, 2.7136, 2.5744, 2.4398, 2.6943,\n 2.9424, 2.8098, 3.0509, 2.9212, 3.1558, 3.0290, 2.9055, 2.7852, 2.6681,\n 2.8943, 2.7791, 3.0000, 2.8868, 3.1027, 2.9913, 3.2026, 3.4101, 3.2998,\n 3.5032, 3.3947, 3.5942, 3.4873, 3.6831, 3.8759, 4.0657, 4.2528, 4.4371,\n 4.6188, 4.7980, 4.9747, 5.1490, 5.3211, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.9333, 6.0928, 5.9874, 5.8835, 5.7812, 5.9386, 6.0943,\n 6.2483, 6.4006, 6.5514, 6.7006, 6.5993, 6.7469, 6.6469, 6.5483, 6.4510,\n 6.3549, 6.5008, 6.6454, 6.5504, 6.6935, 6.5997, 6.7414, 6.8819, 6.7890,\n 6.6973, 6.6066, 6.7456, 6.8834, 7.0201, 7.1556, 7.2900, 7.2001, 7.3333,\n 7.2443, 7.1563, 7.0692, 6.9830, 7.1149, 7.0296, 6.9451, 7.0759, 7.2058,\n 7.3346, 7.4625, 7.3786, 7.2956, 7.2134, 7.1319, 7.2587, 7.3845, 7.3037,\n 7.4286, 7.3485, 7.4724, 7.5955, 7.7178, 7.8393, 7.7597, 7.6808, 7.6026,\n 7.5251, 7.6456, 7.7653, 7.6883, 7.8072, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.0476, 7.9729, 7.8988, 8.0139, 8.1282, 8.0546, 8.1683,\n 8.0952, 8.2082, 8.3205, 8.4322, 8.5433, 8.4706, 8.3984, 8.3268, 8.2557,\n 8.3660, 8.4757, 8.4050, 8.5141, 8.4439, 8.5524, 8.6603, 8.7676, 8.8744,\n 8.8045, 8.7351, 8.6662, 8.5978, 8.7039, 8.8094, 8.7414, 8.8464, 8.7788,\n 8.8832, 8.9872, 9.0906, 9.1936, 9.1262, 9.0593, 8.9929, 8.9268, 9.0292,\n 9.1310, 9.0653, 9.1667, 9.1013, 9.2022, 9.3026, 9.4026, 9.5021, 9.4370,\n 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is eating something.\nSentence 2: A woman is eating meat.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.7896, 0.7461, 0.8682, 0.9897, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.5384, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.7442, 4.6667, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.5448,\n 5.6725, 5.7994, 5.7295, 5.6602, 5.5915, 5.5233, 5.4557, 5.5811,\n 5.5138, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.7411, 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.7787, 5.7155, 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 6.1632, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman peels a potato.\nSentence 2: A woman is peeling a potato.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 2.3163, 2.2011, 2.4371, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 2.8804, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823, 2.8868,\n 3.0793, 2.9848, 3.1741, 3.3607, 3.5447, 3.4503, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.0000, 3.1789, 3.0924, 3.2686, 3.4427, 3.3566,\n 3.2717, 3.1879, 3.1052, 3.2757, 3.4442, 3.6109, 3.5282, 3.4466,\n 3.3659, 3.2863, 3.2077, 3.1300, 3.2928, 3.2157, 3.3764, 3.5355,\n 3.4586, 3.3826, 3.3075, 3.2332, 3.3895, 3.5443, 3.6977, 3.6233,\n 3.5496, 3.4768, 3.4047, 3.3333, 3.2627, 3.4130, 3.3428, 3.4915,\n 3.6389, 3.5689, 3.4995, 3.6452, 3.5762, 3.7205, 3.8636, 4.0056,\n 3.9365, 3.8680, 3.8002, 3.7330, 3.6664, 3.6004, 3.7399, 3.6742,\n 3.8125, 3.9497, 3.8841, 3.8191, 3.9549, 3.8903, 4.0249, 4.1586,\n 4.2914, 4.2267, 4.1625, 4.0988, 4.0356, 3.9729, 3.9107, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.1763, 4.1150, 4.0541, 3.9936, 4.1210,\n 4.2475, 4.3733, 4.3128, 4.2527, 4.1931, 4.1338, 4.0750, 4.0166,\n 4.1406, 4.0825, 4.2056, 4.3280, 4.2699, 4.2122, 4.1549, 4.0980,\n 4.2191, 4.3395, 4.4593, 4.4023, 4.3456, 4.2893, 4.2334, 4.1779,\n 4.1226, 4.2409, 4.1859, 4.3033, 4.4202, 4.3652, 4.3106, 4.2563,\n 4.2023, 4.3180, 4.4331, 4.5476, 4.4936, 4.4399, 4.3864, 4.3333,\n 4.2805, 4.2280, 4.3412, 4.2889, 4.4014, 4.5134, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The boy fell off his bike.\nSentence 2: A boy falls off his bike.\nSimilarity score:", + "true_label": 4.800000190734863, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.8034, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.8728, 1.7685, 2.0000,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 2.0428, 1.9462, 2.1602,\n 2.3706, 2.2743, 2.1798, 2.0870, 2.2916, 2.4930, 2.6914, 2.8868,\n 3.0793, 2.9848, 2.8919, 2.8006, 2.9887, 2.8983, 3.0833, 3.2660,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.3556, 3.2686, 3.1829, 3.3566,\n 3.2717, 3.4429, 3.6122, 3.5277, 3.4442, 3.3619, 3.2806, 3.4466,\n 3.3659, 3.5298, 3.6919, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.2242, 3.1493, 3.0754, 3.0022, 2.9299, 2.8583, 2.7875, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.0548, 1.9920, 2.1442, 2.0817, 2.2323, 2.3817, 2.3190,\n 2.2569, 2.1954, 2.1344, 2.2813, 2.2205, 2.3660, 2.5103, 2.4495,\n 2.3891, 2.5318, 2.4717, 2.6131, 2.7534, 2.8928, 3.0311, 2.9704,\n 2.9103, 2.8505, 2.9872, 2.9277, 3.0632, 3.1977, 3.3314, 3.4641,\n 3.4042, 3.3447, 3.2857, 3.2271, 3.1690, 3.2998, 3.2419, 3.3717,\n 3.5007, 3.4428, 3.3853, 3.3282, 3.2715, 3.3989, 3.3424, 3.4689,\n 3.5946, 3.5382, 3.4821, 3.4263, 3.3710, 3.3160, 3.2614, 3.2071,\n 3.1532, 3.0997, 3.0464, 2.9935, 2.9410, 2.8887, 2.8368, 2.7852,\n 2.7340, 2.6830, 2.6323, 2.5820, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.4553, 2.4065, 2.5265, 2.6458, 2.5969, 2.5483, 2.5000,\n 2.4520, 2.5700, 2.5220, 2.6393, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The woman is playing the flute.\nSentence 2: A woman is playing a flute.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.0498, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.2907, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.1761, 0.1317, 0.0875, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.2540, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.2487, 0.2067, 0.1650, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.0948, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.9335, 2.1320, 2.0455, 1.9604, 1.8766, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.9333, 2.1193, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.9127, 1.8378, 2.0158, 2.1918, 2.1167, 2.0426, 1.9695,\n 1.8972, 2.0692, 1.9973, 2.1669, 2.3349, 2.2629, 2.1917, 2.1213,\n 2.2862, 2.2162, 2.1470, 2.3094, 2.4703, 2.4010, 2.5600, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.5991, 2.5322, 2.4660, 2.6186,\n 2.7699, 2.7037, 2.8534, 3.0019, 2.9357, 2.8701, 2.8051, 2.7406,\n 2.6768, 2.8226, 2.7591, 2.9035, 3.0467, 2.9832, 2.9202, 2.8577,\n 2.7958, 2.9369, 2.8753, 3.0151, 3.1539, 3.0923, 3.0311, 3.1685,\n 3.1076, 3.2437, 3.1831, 3.3181, 3.4521, 3.3915, 3.5245, 3.6566,\n 3.5960, 3.5359, 3.4762, 3.4170, 3.3582, 3.4884, 3.4298, 3.5590,\n 3.6874, 3.6289, 3.5708, 3.6980, 3.6401, 3.7664, 3.7087, 3.8341,\n 3.9586, 3.9010, 4.0247, 4.1477, 4.0901, 4.0330, 3.9762, 3.9198,\n 3.8638, 3.9853, 3.9294, 4.0501, 4.1700, 4.1143, 4.0589, 4.0038,\n 3.9491, 4.0678, 4.0132, 4.1312, 4.2485, 4.1940, 4.3106, 4.4265,\n 4.3721, 4.3180, 4.2642, 4.2108, 4.1576, 4.2723, 4.2193, 4.3333,\n 4.4468, 4.3938, 4.3412, 4.2889, 4.4014, 4.3492, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A rabbit is running from an eagle.\nSentence 2: A hare is running from a eagle.\nSimilarity score:", + "true_label": 4.199999809265137, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 1.0206,\n 1.1692, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.1711, 1.1183, 1.0659, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.4662, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 1.1794, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 0.9272, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 1.0788, 1.0328, 1.1587, 1.2839, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.3197, 1.2752, 1.3950, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.4560, 8.3521, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.2151, 8.1176, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.1302, 9.2463, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.6757, 9.7869, 9.8975, 9.8150,\n 9.9249, 9.8431, 9.9524, 10.0611, 9.9800, 9.8995, 9.8197, 9.7405,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.9067, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.2790, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.4367, 10.3617, 10.2872, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.0913, 11.0194, 11.1164, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.2624, 11.3577, 11.4525, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The woman is frying a breaded pork chop.\nSentence 2: A woman is cooking a breaded pork chop.\nSimilarity score:", + "true_label": 4.199999809265137, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.1588, -0.2108,\n -0.0525, -0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.1267, -0.1684, -0.2100, -0.0838, -0.1253, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 3.0290, 2.9055, 2.7852, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.5568, 2.4495, 2.6679, 2.8823, 2.7757, 2.6713, 2.5690, 2.4689,\n 2.6765, 2.5775, 2.7811, 2.9814, 2.8830, 2.7863, 2.6914, 2.8868,\n 3.0793, 3.2691, 3.1741, 3.0806, 2.9887, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.0000, 2.9140, 2.8292, 3.0071, 2.9231, 3.0984,\n 3.2717, 3.1879, 3.1052, 3.2757, 3.1937, 3.3619, 3.5282, 3.4466,\n 3.3659, 3.2863, 3.4498, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.2242, 3.1493, 3.3075, 3.2332, 3.3895, 3.5443, 3.4702, 3.3968,\n 3.3243, 3.2525, 3.4047, 3.5556, 3.4839, 3.4130, 3.3428, 3.4915,\n 3.6389, 3.5689, 3.4995, 3.4308, 3.3627, 3.2953, 3.2285, 3.3731,\n 3.3066, 3.4499, 3.5920, 3.5256, 3.4599, 3.3947, 3.3301, 3.4701,\n 3.6091, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 3.8262, 3.7626,\n 3.6995, 3.8335, 3.7707, 3.9036, 4.0356, 3.9729, 3.9107, 3.8490,\n 3.9795, 4.1092, 4.0476, 3.9865, 3.9258, 3.8655, 3.8057, 3.7463,\n 3.8741, 3.8150, 3.9418, 4.0678, 4.0087, 3.9501, 4.0750, 4.0166,\n 4.1406, 4.2639, 4.2056, 4.1477, 4.0901, 4.2122, 4.3336, 4.2762,\n 4.2191, 4.1624, 4.1061, 4.0501, 3.9945, 4.1143, 4.2334, 4.1779,\n 4.1226, 4.0678, 4.1859, 4.3033, 4.2485, 4.1940, 4.1399, 4.0860,\n 4.0325, 3.9793, 4.0953, 4.0423, 4.1576, 4.2723, 4.2193, 4.1667,\n 4.1143, 4.0622, 4.1758, 4.2889, 4.4014, 4.3492, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A girl is flying a kite.\nSentence 2: A girl running is flying a kite.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.3373, 1.2808, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.4071, 1.3517, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.5073, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.5714, 1.5187, 1.4662, 1.6028, 1.5505, 1.4985,\n 1.6337, 1.5818, 1.5303, 1.6641, 1.6127, 1.5617, 1.6941, 1.6432,\n 1.5926, 1.7237, 1.6732, 1.6230, 1.7529, 1.7028, 1.6530, 1.7817,\n 1.7321, 1.6827, 1.8102, 1.7609, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.8660, 1.8175, 1.7693, 1.8935, 1.8453, 1.7974, 1.9206, 1.8728,\n 1.8252, 1.9473, 1.8999, 1.8527, 1.9738, 1.9267, 1.8799, 2.0000,\n 1.9533, 1.9068, 2.0259, 1.9795, 1.9333, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.0342, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.1933, 11.2924, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.4581, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.1141, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is riding a mechanical bull.\nSentence 2: A man rode a mechanical bull.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.2982, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.9245, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 1.9795, 2.2133, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.0211, 2.2418, 2.1412, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.2743, 2.1798, 2.3851, 2.2916, 2.4930, 2.6914, 2.5981,\n 2.5064, 2.4163, 2.6098, 2.5205, 2.4327, 2.6222, 2.8093, 2.7217,\n 2.9057, 2.8189, 2.7333, 2.9140, 2.8292, 2.7456, 2.6632, 2.8402,\n 3.0151, 2.9329, 3.1052, 3.0237, 2.9433, 2.8638, 2.7854, 2.9542,\n 2.8764, 3.0429, 3.2077, 3.1300, 3.0533, 2.9775, 2.9025, 2.8284,\n 2.7552, 2.9161, 3.0754, 3.0022, 3.1597, 3.0870, 3.0151, 2.9439,\n 2.8735, 3.0282, 2.9582, 3.1111, 3.2627, 3.1928, 3.1236, 3.0551,\n 2.9872, 2.9200, 2.8534, 3.0019, 3.1492, 3.0827, 3.2285, 3.1623,\n 3.0967, 3.2408, 3.1755, 3.1109, 3.0467, 3.1889, 3.3301, 3.2660,\n 3.4058, 3.3420, 3.2788, 3.4171, 3.3542, 3.2918, 3.2299, 3.3665,\n 3.5022, 3.4403, 3.5748, 3.5132, 3.4521, 3.3915, 3.3314, 3.2717,\n 3.2124, 3.3447, 3.4762, 3.4170, 3.5474, 3.4884, 3.4298, 3.3717,\n 3.3140, 3.2567, 3.1998, 3.3282, 3.4558, 3.3989, 3.5256, 3.4689,\n 3.4126, 3.3567, 3.3012, 3.2460, 3.1912, 3.3160, 3.4401, 3.3853,\n 3.5085, 3.4539, 3.3996, 3.3457, 3.2921, 3.2389, 3.1860, 3.3075,\n 3.4283, 3.3754, 3.4954, 3.4427, 3.3902, 3.5093, 3.4570, 3.4050,\n 3.3534, 3.4713, 3.5887, 3.5370, 3.6537, 3.6021, 3.5509, 3.5000,\n 3.6156, 3.5648, 3.5143, 3.6291, 3.7432, 3.6927, 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man is playing the guitar.\nSentence 2: A man is playing a guitar.\nSimilarity score:", + "true_label": 4.908999919891357, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.3638, 7.5032, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.4233, 7.3333, 7.4655, 7.3765, 7.2884, 7.2012,\n 7.3322, 7.2459, 7.3758, 7.2904, 7.2058, 7.1220, 7.0391, 6.9570,\n 7.0857, 7.0043, 6.9237, 6.8439, 6.7648, 6.8922, 7.0187, 7.1443,\n 7.0658, 6.9879, 6.9107, 6.8343, 6.9587, 6.8828, 6.8076, 6.7330,\n 6.6591, 6.7823, 6.9048, 7.0265, 6.9529, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.5933, 7.7096, 7.8253, 7.7524, 7.8673,\n 7.7949, 7.9091, 8.0227, 8.1356, 8.2479, 8.3595, 8.2874, 8.3984,\n 8.5088, 8.6186, 8.5469, 8.4757, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.6603, 8.7676, 8.8744, 8.9806, 9.0863, 9.0164, 8.9469, 9.0520,\n 8.9830, 8.9145, 9.0190, 9.1230, 9.0549, 8.9872, 9.0906, 9.1936,\n 9.2960, 9.3980, 9.3306, 9.4321, 9.3651, 9.2986, 9.2324, 9.3333,\n 9.2676, 9.3680, 9.3026, 9.2376, 9.3375, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is dancing and singing with other women.\nSentence 2: A woman is dancing and singing in the rain.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.2982, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 2.5924, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 3.9337, 3.7905, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 5.7735,\n 5.9438, 6.1118, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.7416, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.7652, 8.6770, 8.5896, 8.7104, 8.8304, 8.9496, 9.0680, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.0453, 8.9612, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.1414, 9.2554, 9.1735, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.3017, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.2790, 10.2029, 10.3065, 10.4097, 10.3341,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 10.9178, 10.8444, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.9939, 11.0913, 11.0194, 10.9480, 11.0450, 11.1415, 11.0705, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.1475, 11.2424, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is slicing a bun.\nSentence 2: A man is slicing an onion.\nSimilarity score:", + "true_label": 2.4000000953674316, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.7%", + "z-score": "14.2", + "p value": "4.81e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 6.8214, 7.0000,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.5491, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.9169, 8.7986, 8.9355, 9.0711, 9.2055, 9.0896, 8.9753, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 9.7778, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.1948, 10.0984, 10.0029, 9.9085,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.6936,\n 10.8025, 10.7141, 10.8224, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.4289, 12.5264, 12.4430, 12.5401, 12.6367, 12.5542, 12.6504,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.3585, 13.4499,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.2737, 14.1966])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 3.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is pouring oil into a pan.\nSentence 2: A man is pouring oil into a skillet.\nSimilarity score:", + "true_label": 4.199999809265137, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.0586, -0.1166, 0.0580, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.1429, 0.0000, -0.0473, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.3004, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.3800, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.2525, 8.4017, 8.5491, 8.6948, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.2410, 9.3721, 9.2600, 9.3901, 9.5191, 9.6470, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.0000, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.4184, 11.5271, 11.6351,\n 11.5391, 11.6465, 11.5515, 11.6584, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.4181, 12.3277, 12.4286, 12.5289, 12.4395, 12.3508, 12.2628, 12.3629,\n 12.4625, 12.5615, 12.4746, 12.3883, 12.4870, 12.5852, 12.4998, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.8160, 12.9116, 12.8285,\n 12.9238, 13.0185, 12.9363, 13.0307, 12.9491, 13.0431, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.1617, 13.2542, 13.1746, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.5526, 13.4744, 13.5647, 13.6546, 13.5771, 13.5000,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A lion is playing with people.\nSentence 2: A lion is playing with two men.\nSimilarity score:", + "true_label": 3.4000000953674316, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.3136, -0.1562, 0.0000, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, 0.0447, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.3522, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.3884, 0.3443, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.4763, 8.3789,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 9.1273, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.6210, 9.7356, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.0647, 9.9807, 9.8975, 10.0074,\n 9.9249, 10.0342, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.7959, 10.7175, 10.8200, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.3143, 12.2403, 12.3333,\n 12.2598, 12.3525, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A dog rides a skateboard.\nSentence 2: A dog is riding a skateboard.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.2667, -1.3245, -1.3817, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -1.9420, -1.9863, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -1.8371,\n -1.8808, -1.7217, -1.7655, -1.8091, -1.6521, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.0889,\n -2.1264, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.0059, -2.0430,\n -1.9101, -1.9473, -1.9843, -1.8527, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -1.8511, -1.8874, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "55.6%", + "z-score": "9.9", + "p value": "2.14e-23", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.6013, 1.8240, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.9711, 2.1798, 2.0870, 1.9959, 2.1997, 2.4004, 2.3094,\n 2.5064, 2.4163, 2.6098, 2.5205, 2.4327, 2.6222, 2.8093, 2.9938,\n 2.9057, 3.0873, 3.0000, 3.1789, 3.0924, 3.2686, 3.4427, 3.3566,\n 3.5283, 3.4429, 3.6122, 3.7796, 3.6947, 3.8600, 4.0234, 4.1851,\n 4.1003, 4.2601, 4.4182, 4.5747, 4.7296, 4.8830, 5.0350, 5.1855,\n 5.3345, 5.2489, 5.3964, 5.3116, 5.4576, 5.3736, 5.5181, 5.4349,\n 5.5780, 5.7199, 5.6373, 5.7778, 5.6959, 5.8351, 5.9732, 5.8919,\n 5.8114, 5.9481, 6.0837, 6.0038, 6.1382, 6.0590, 6.1923, 6.1137,\n 6.0359, 5.9589, 6.0908, 6.0143, 5.9386, 6.0693, 5.9941, 6.1237,\n 6.2524, 6.3803, 6.5072, 6.6332, 6.7585, 6.8828, 6.8076, 6.7330,\n 6.8564, 6.9789, 7.1007, 7.2217, 7.1474, 7.2675, 7.1938, 7.1207,\n 7.2399, 7.3584, 7.2857, 7.4034, 7.5204, 7.6368, 7.5644, 7.4927,\n 7.6082, 7.7230, 7.6517, 7.5809, 7.6950, 7.6246, 7.7380, 7.8507,\n 7.9628, 8.0742, 8.1851, 8.2954, 8.4050, 8.5141, 8.4439, 8.5524,\n 8.4826, 8.4133, 8.5212, 8.4523, 8.5595, 8.6662, 8.7724, 8.8780,\n 8.9830, 9.0876, 9.1916, 9.2952, 9.2265, 9.1584, 9.2613, 9.1936,\n 9.1262, 9.2287, 9.3306, 9.2637, 9.3651, 9.4661, 9.5666, 9.6667,\n 9.7663, 9.8654, 9.7987, 9.8974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Someone is carving a statue.\nSentence 2: A man is carving a statue.\nSimilarity score:", + "true_label": 3.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 2.0381, 1.8889, 1.7457, 2.0370, 2.3190, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.5627, 2.4351, 2.3113, 2.5560,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.6681, 2.8943, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 3.1918, 3.0861,\n 3.2883, 3.1840, 3.3824, 3.2796, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.2667, 3.4503, 3.3574, 3.2660,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.3556, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.6979, 3.6122, 3.7796, 3.9452, 3.8600, 3.7758, 3.9389,\n 3.8555, 4.0166, 3.9340, 4.0931, 4.0112, 3.9302, 4.0872, 4.2426,\n 4.1621, 4.0825, 4.2359, 4.1569, 4.3086, 4.2303, 4.3804, 4.3027,\n 4.2258, 4.3740, 4.5210, 4.4444, 4.3687, 4.2938, 4.2196, 4.3644,\n 4.2907, 4.4341, 4.3609, 4.2885, 4.4302, 4.5708, 4.4987, 4.4272,\n 4.3564, 4.2862, 4.4249, 4.3552, 4.4927, 4.4234, 4.3547, 4.4907,\n 4.6258, 4.5573, 4.4895, 4.4222, 4.3554, 4.4888, 4.4224, 4.5547,\n 4.4887, 4.4233, 4.5542, 4.6843, 4.6191, 4.5543, 4.4901, 4.4264,\n 4.5549, 4.4915, 4.6190, 4.5560, 4.4933, 4.6198, 4.7454, 4.6829,\n 4.6209, 4.7455, 4.6838, 4.8074, 4.7460, 4.8687, 4.8076, 4.7469,\n 4.8687, 4.9897, 4.9292, 4.8690, 4.8093, 4.9292, 5.0485, 4.9889,\n 5.1073, 5.0480, 4.9891, 5.1066, 5.2235, 5.1647, 5.1063, 5.2223,\n 5.1642, 5.2795, 5.2215, 5.3361, 5.2784, 5.2211, 5.3349, 5.4480,\n 5.3909, 5.3340, 5.2775, 5.2213, 5.3335, 5.2776, 5.3891, 5.3333,\n 5.2779, 5.3887, 5.4989, 5.4436, 5.3886, 5.3340, 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is slicing an onion.\nSentence 2: A man is cutting an onion.\nSimilarity score:", + "true_label": 2.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.4923, 1.4427, 1.3933, 1.3443, 1.4743, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.4087, 1.3607, 1.3131, 1.4402, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.3933, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 6.8483, 6.7469, 6.6469,\n 6.7931, 6.6944, 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 10.1692, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.7480, 10.6683, 10.7719,\n 10.8749, 10.9773, 10.8984, 10.8200, 10.9220, 11.0235, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.5655, 11.4891,\n 11.5868, 11.6840, 11.6082, 11.7050, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.2794, 12.3718, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman peels shrimp.\nSentence 2: A woman is peeling shrimp.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.3943, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.0435, 0.9901,\n 1.1345, 1.0812, 1.2243, 1.3663, 1.3128, 1.2597, 1.2070, 1.3472,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 1.1651, 1.1169, 1.2472,\n 1.1991, 1.3284, 1.4570, 1.4087, 1.3607, 1.3131, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.5384, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.7442, 4.6667, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.5448,\n 5.6725, 5.7994, 5.7295, 5.6602, 5.5915, 5.5233, 5.4557, 5.5811,\n 5.5138, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.7411, 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.7787, 5.7155, 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 6.1632, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is frying fish.\nSentence 2: A woman is cooking fish.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.5850, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.5033, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 7.8889, 7.7937, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.5642, 10.4852, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.6082, 11.5329, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.1468, 12.0731, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is playing an electric guitar.\nSentence 2: A woman is playing a guitar.\nSimilarity score:", + "true_label": 3.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A baby tiger is playing with a ball.\nSentence 2: A baby is playing with a doll.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 1.2344,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.2261, 0.1803, 0.1348, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.3974, 0.3522, 0.3073, 0.2626, 0.3928, 0.3482,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.3800, 0.3369, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.5115, 4.4061, 4.3026, 4.4809, 4.6568, 4.5544, 4.7278, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.5034, 4.6715, 4.8375, 4.7419, 4.9058,\n 5.0679, 4.9731, 5.1332, 5.0395, 4.9472, 4.8561, 5.0138, 5.1698,\n 5.3243, 5.2338, 5.3865, 5.2970, 5.2086, 5.3594, 5.5088, 5.6569,\n 5.5690, 5.7155, 5.8606, 6.0044, 6.1470, 6.2883, 6.2008, 6.3408,\n 6.4795, 6.3928, 6.3070, 6.2222, 6.3595, 6.2755, 6.1924, 6.1101,\n 6.2459, 6.3807, 6.5144, 6.4327, 6.3517, 6.2716, 6.4040, 6.5354,\n 6.4558, 6.5861, 6.7155, 6.6365, 6.5583, 6.6865, 6.8138, 6.9402,\n 6.8624, 6.9879, 7.1125, 7.0353, 7.1590, 7.0823, 7.0063, 6.9310,\n 7.0537, 7.1755, 7.2966, 7.2217, 7.3419, 7.2675, 7.1938, 7.1207,\n 7.2399, 7.3584, 7.4762, 7.5933, 7.7096, 7.6368, 7.7524, 7.6800,\n 7.7949, 7.7230, 7.8372, 7.7658, 7.8793, 7.8084, 7.9211, 7.8507,\n 7.7808, 7.8928, 8.0042, 7.9347, 8.0455, 7.9764, 8.0865, 8.1960,\n 8.3050, 8.4133, 8.3446, 8.4523, 8.5595, 8.6662, 8.7724, 8.8780,\n 8.8094, 8.9145, 9.0190, 8.9509, 8.8832, 8.8160, 8.9199, 8.8531,\n 8.7867, 8.7207, 8.8240, 8.9268, 9.0292, 8.9635, 8.8982, 8.8333,\n 8.9351, 9.0364, 8.9718, 9.0726, 9.1730, 9.1088, 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person is slicing a tomato.\nSentence 2: A person is slicing some meat.\nSimilarity score:", + "true_label": 1.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.9152, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.9428,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.6199, 0.5740, 0.5283, 0.6584, 0.7878, 0.9165, 0.8704,\n 0.8245, 0.9520, 0.9062, 1.0328, 1.1587, 1.1127, 1.0670, 1.0215,\n 1.1461, 1.1007, 1.2244, 1.1790, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 9.1561, 9.0652,\n 8.9752, 8.8860, 9.0060, 9.1252, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.0389, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.6306, 10.7367, 10.8423, 10.7594, 10.6771,\n 10.7822, 10.7006, 10.6196, 10.7242, 10.8282, 10.7480, 10.8515, 10.9545,\n 11.0569, 10.9773, 11.0793, 11.1807, 11.1018, 11.0235, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.1933, 11.2924, 11.2164, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.4581, 11.5549, 11.4806, 11.5771,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.7169, 11.6441, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.2068, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person cuts an onion.\nSentence 2: A person is cutting an onion.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.4618,\n -2.5092, -2.5560, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.5852, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.7775, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.6540, -2.6934, -2.7325, -2.7714, -2.8101, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -2.8845, -2.9216, -2.9584, -2.9950, -3.0315, -3.0677, -3.1038,\n -2.9576, -2.9938, -3.0298, -3.0657, -2.9215, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.3044, -3.3381, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.3277, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.0590, 13.1547, 13.0690, 13.1644, 13.0795, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.0185, 12.9363, 13.0307, 12.9491, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.0030, 13.0956, 13.0171, 12.9391,\n 12.8616, 12.9540, 13.0460, 12.9691, 12.8928, 12.9845, 12.9087, 12.8333,\n 12.9247, 13.0157, 13.1063, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing the piano.\nSentence 2: A woman is playing the violin.\nSimilarity score:", + "true_label": 1.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.0702, 8.9763, 9.0987, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.2209, 12.3143, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is playing the flute.\nSentence 2: A man is playing the guitar.\nSimilarity score:", + "true_label": 1.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is cutting up a potato.\nSentence 2: A man is cutting up carrots.\nSimilarity score:", + "true_label": 2.375, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, 0.1659, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.3443, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.4669, 0.4233, 0.5489, 0.5053, 0.6299, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 10.9585, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.4384,\n 12.3289, 12.4370, 12.5443, 12.4365, 12.3299, 12.4370, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.6508, 12.5485, 12.6529, 12.7567, 12.6557,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.4674, 13.3710, 13.4691, 13.5668, 13.4715,\n 13.3770, 13.4745, 13.5714, 13.6679, 13.7638, 13.6707, 13.7663, 13.6742,\n 13.5827, 13.6781, 13.5876, 13.6826, 13.7772, 13.8713, 13.7818, 13.8756,\n 13.7870, 13.8804, 13.9735, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.3449, 14.4355, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.7113, 14.7998, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 14.9950, 15.0810, 15.1667,\n 15.2520, 15.1712, 15.0909, 15.1761, 15.2609, 15.1813, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A kid is playing guitar.\nSentence 2: A boy is playing a guitar.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.6547,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 1.0541,\n 0.9972, 0.9409, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.5228, 3.7273, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.0332, 5.1978, 5.0990, 5.0017, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.4482, 5.6032, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.2354, 6.3768, 6.2883, 6.2008, 6.1143,\n 6.0288, 6.1685, 6.3070, 6.4444, 6.5807, 6.7159, 6.6308, 6.7648,\n 6.6804, 6.8133, 6.9451, 7.0759, 7.2058, 7.3346, 7.4625, 7.3786,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.2032, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.3172,\n 8.4348, 8.3550, 8.4718, 8.5879, 8.5088, 8.4303, 8.5456, 8.6603,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.0340, 9.1452, 9.2559, 9.1785,\n 9.2885, 9.2118, 9.3212, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.5485, 9.4761, 9.5808, 9.5089, 9.6130, 9.7167, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.2565, 10.3566, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.8770, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A boy is playing guitar.\nSentence 2: A man is playing a guitar.\nSimilarity score:", + "true_label": 3.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.5228, 3.7273, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.0332, 5.1978, 5.0990, 5.0017, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.4482, 5.6032, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.2354, 6.3768, 6.2883, 6.2008, 6.1143,\n 6.0288, 6.1685, 6.3070, 6.4444, 6.5807, 6.7159, 6.6308, 6.7648,\n 6.6804, 6.8133, 6.9451, 7.0759, 7.2058, 7.3346, 7.4625, 7.3786,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.2032, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.3172,\n 8.4348, 8.3550, 8.4718, 8.5879, 8.5088, 8.4303, 8.5456, 8.6603,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.0340, 9.1452, 9.2559, 9.1785,\n 9.2885, 9.2118, 9.3212, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.5485, 9.4761, 9.5808, 9.5089, 9.6130, 9.7167, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.2565, 10.3566, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.8770, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing guitar.\nSentence 2: A boy is playing a guitar.\nSimilarity score:", + "true_label": 3.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.5228, 3.7273, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.0332, 5.1978, 5.0990, 5.0017, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.4482, 5.6032, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.2354, 6.3768, 6.2883, 6.2008, 6.1143,\n 6.0288, 6.1685, 6.3070, 6.4444, 6.5807, 6.7159, 6.6308, 6.7648,\n 6.6804, 6.8133, 6.9451, 7.0759, 7.2058, 7.3346, 7.4625, 7.3786,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.2032, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.3172,\n 8.4348, 8.3550, 8.4718, 8.5879, 8.5088, 8.4303, 8.5456, 8.6603,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.0340, 9.1452, 9.2559, 9.1785,\n 9.2885, 9.2118, 9.3212, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.5485, 9.4761, 9.5808, 9.5089, 9.6130, 9.7167, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.2565, 10.3566, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.8770, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A little boy is playing a keyboard.\nSentence 2: A boy is playing key board.\nSimilarity score:", + "true_label": 4.400000095367432, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "6.0%", + "z-score": "-6.18", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -2.9785, -3.0237, -3.0685, -3.1129, -3.1568, -3.2004,\n -3.2435, -3.2863, -3.3288, -3.3708, -3.4125, -3.4538, -3.2579, -3.2998,\n -3.3414, -3.3826, -3.4235, -3.4641, -3.5044, -3.5443, -3.5839, -3.6233,\n -3.6623, -3.7011, -3.7396, -3.7778, -3.8157, -3.8534, -3.8908, -3.9279,\n -3.9648, -4.0015, -4.0379, -4.0740, -4.1100, -4.1457, -4.1811, -4.2164,\n -4.2514, -4.2862, -4.3208, -4.3552, -4.3894, -4.4234, -4.4571, -4.4907,\n -4.5241, -4.5573, -4.5903, -4.6232, -4.6558, -4.4888, -4.5218, -4.5547,\n -4.5874, -4.6198, -4.6522, -4.6843, -4.7163, -4.7481, -4.7798, -4.8113,\n -4.8426, -4.8737, -4.7143, -4.7458, -4.7771, -4.8083, -4.8394, -4.8702,\n -4.9010, -4.9316, -4.9620, -4.9923, -5.0225, -5.0525, -5.0823, -5.1121,\n -5.1417, -5.1711, -5.2005, -5.2297, -5.2588, -5.2877, -5.3165, -5.3452,\n -5.3738, -5.4023, -5.4306, -5.4588, -5.4869, -5.5149, -5.5427, -5.5705,\n -5.5981, -5.6256, -5.6531, -5.6804, -5.7076, -5.7347, -5.7617, -5.7885,\n -5.8153, -5.8420, -5.8686, -5.8951, -5.9214, -5.9477, -5.9739, -6.0000,\n -6.0260, -6.0519, -6.0777, -6.1034, -6.1290, -6.1546, -6.1800])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.6307, 9.5400, 9.6566, 9.5668, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 9.8159, 9.9278, 10.0389, 9.9542, 9.8702, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.0342, 9.9524, 10.0611, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.1018, 11.0235, 11.1245, 11.0468,\n 10.9697, 11.0702, 10.9936, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.3099, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.5948, 11.5235, 11.6179, 11.7120, 11.6411, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A man is playing an electric guitar.\nSimilarity score:", + "true_label": 3.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.2827, 7.1813, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.0711,\n 6.9759, 7.1152, 7.0211, 7.1591, 7.0662, 6.9743, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.4233, 7.3333, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.0134, 8.9285, 9.0453, 9.1615, 9.0773, 9.1927, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.7908, 9.7109, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.6148, 9.5381, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.2029, 10.3065, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.3154, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.7527, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.0521, 10.9829, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A dog licks a baby.\nSentence 2: A dog is licking a baby.\nSimilarity score:", + "true_label": 4.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.1644, -0.2182,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.2722, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.7139, -0.7539, -0.7937, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.8728, 1.7685, 2.0000,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 2.0428, 1.9462, 2.1602,\n 2.3706, 2.2743, 2.1798, 2.0870, 2.2916, 2.1997, 2.4004, 2.3094,\n 2.5064, 2.7005, 2.6098, 2.5205, 2.4327, 2.3462, 2.5352, 2.4495,\n 2.3651, 2.5504, 2.4667, 2.6491, 2.8292, 2.7456, 2.6632, 2.5820,\n 2.5019, 2.4228, 2.3448, 2.2678, 2.4423, 2.3658, 2.5378, 2.7080,\n 2.6316, 2.5560, 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.2862, 2.4495, 2.6112, 2.5403, 2.7001, 2.8583, 2.7875, 2.9439,\n 2.8735, 2.8039, 2.7349, 2.8889, 2.8203, 2.9726, 3.1236, 3.0551,\n 2.9872, 3.1363, 3.2841, 3.4308, 3.5762, 3.5079, 3.4402, 3.3731,\n 3.3066, 3.4499, 3.5920, 3.5256, 3.4599, 3.6004, 3.7399, 3.8784,\n 4.0158, 4.1522, 4.2877, 4.4222, 4.5557, 4.4888, 4.6212, 4.5547,\n 4.6860, 4.6198, 4.7501, 4.6843, 4.6191, 4.5543, 4.6832, 4.8113,\n 4.9385, 5.0649, 5.1905, 5.3153, 5.4393, 5.5626, 5.6851, 5.8068,\n 5.9279, 5.8621, 5.7967, 5.9168, 6.0362, 6.1548, 6.2728, 6.2075,\n 6.3247, 6.2598, 6.1954, 6.3117, 6.4274, 6.3632, 6.2994, 6.4143,\n 6.5285, 6.6421, 6.7551, 6.6914, 6.6282, 6.5653, 6.5029, 6.6150,\n 6.7264, 6.8373, 6.9477, 7.0574, 7.1667, 7.2753, 7.2127, 7.1506,\n 7.2585, 7.3660, 7.3041, 7.4109, 7.5173, 7.6231, 7.5614, 7.5000,\n 7.6052, 7.5441, 7.4834, 7.5880, 7.6922, 7.7958, 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is slicing an onion.\nSentence 2: A man is cutting and onion.\nSimilarity score:", + "true_label": 3.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.6433, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.8325, 0.7807, 0.7293, 0.6783, 0.8208, 0.9623,\n 0.9110, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.6825, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.9382, 0.8909,\n 1.0215, 0.9742, 0.9272, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.8513,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.9659, 1.0890, 1.0444, 1.1667,\n 1.1221, 1.0777, 1.0336, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.0379, 6.9378, 7.0812, 7.2232, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.6466, 7.7778, 7.9079, 7.8168, 7.7268, 7.6376,\n 7.5494, 7.6785, 7.8065, 7.9336, 7.8463, 7.7598, 7.8859, 8.0111,\n 7.9254, 7.8406, 7.9649, 8.0882, 8.2107, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.4057, 8.5249, 8.6433, 8.5607, 8.6783, 8.7952, 8.9113,\n 8.8294, 8.9448, 9.0595, 8.9783, 9.0923, 9.2055, 9.1250, 9.0452,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.4752, 9.5840, 9.6921, 9.6148, 9.7224, 9.8293, 9.9357, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.2872, 10.2132, 10.1398, 10.2419, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.5001, 10.5998, 10.6990, 10.6271, 10.7258,\n 10.8241, 10.7527, 10.6817, 10.7795, 10.8770, 10.8064, 10.9034, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.0521, 11.1475, 11.2424, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing the guitar.\nSentence 2: A man is playing the drums.\nSimilarity score:", + "true_label": 1.555999994277954, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is slicing a pepper.\nSentence 2: A woman is cutting a red pepper.\nSimilarity score:", + "true_label": 3.937999963760376, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.0980, 3.9620, 3.8297, 3.7009, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.0825, 3.9614, 3.8431, 3.7273, 3.9284, 3.8146, 4.0119,\n 4.2060, 4.0937, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.6904, 4.8669, 5.0410, 4.9348, 4.8305, 5.0019, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.4610, 5.6220, 5.7812, 5.6804,\n 5.5811, 5.4832, 5.3867, 5.5435, 5.6986, 5.8522, 6.0041, 5.9084,\n 6.0587, 6.2075, 6.1128, 6.0193, 5.9270, 5.8358, 5.9827, 6.1283,\n 6.2725, 6.4153, 6.3248, 6.4663, 6.6066, 6.5169, 6.4283, 6.5672,\n 6.4795, 6.6171, 6.7536, 6.8889, 7.0231, 6.9361, 7.0692, 7.2012,\n 7.1149, 7.0296, 7.1605, 7.0759, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.5056, 7.6315, 7.7566, 7.6734, 7.5910, 7.5094, 7.4286, 7.5526,\n 7.6758, 7.7981, 7.9196, 7.8393, 7.9600, 8.0798, 8.0002, 7.9212,\n 7.8429, 7.7653, 7.8842, 8.0024, 8.1198, 8.2365, 8.1594, 8.2754,\n 8.3906, 8.3140, 8.2381, 8.1628, 8.0880, 8.2024, 8.3162, 8.4293,\n 8.5417, 8.4674, 8.5792, 8.6903, 8.6165, 8.5433, 8.4706, 8.3984,\n 8.5088, 8.6186, 8.7278, 8.8364, 8.7646, 8.8726, 8.9800, 8.9087,\n 8.8379, 8.9447, 8.8744, 8.9806, 9.0863, 9.1915, 9.2961, 9.2261,\n 9.3302, 9.4338, 9.3642, 9.2952, 9.2265, 9.3295, 9.4320, 9.5341,\n 9.6356, 9.7367, 9.6684, 9.7690, 9.8691, 9.8012, 9.7337, 9.8333,\n 9.7663, 9.8654, 9.9641, 10.0624, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing the drums.\nSentence 2: A man plays the drum.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 1.9415, 2.1909,\n 2.0738, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.1160, 3.3333,\n 3.5466, 3.7559, 3.9614, 4.1633, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.6573, 5.5432, 5.7133, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.6086, 5.5035, 5.4000, 5.5630, 5.7242, 5.8835, 6.0410, 5.9386,\n 6.0943, 5.9932, 5.8936, 6.0474, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.3035, 6.2075, 6.1128, 6.0193, 6.1664, 6.0740, 6.2197, 6.1283,\n 6.0380, 6.1820, 6.0927, 6.0044, 5.9172, 6.0596, 5.9732, 5.8878,\n 5.8034, 5.7199, 5.8605, 5.7778, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.2459, 6.3807, 6.5144, 6.6471, 6.5653, 6.6968, 6.6157, 6.5354,\n 6.6658, 6.5861, 6.5072, 6.6365, 6.7648, 6.6865, 6.8138, 6.7361,\n 6.6591, 6.7854, 6.9107, 7.0353, 7.1590, 7.0823, 7.2051, 7.3271,\n 7.4483, 7.3721, 7.2966, 7.4168, 7.5364, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.7407, 7.6667, 7.7831, 7.7096, 7.8253, 7.7524, 7.6800,\n 7.7949, 7.9091, 8.0227, 8.1356, 8.2479, 8.1758, 8.2874, 8.3984,\n 8.3268, 8.2557, 8.1851, 8.2954, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.8379, 8.9447, 8.8744, 8.9806, 8.9107, 9.0164, 8.9469, 9.0520,\n 9.1566, 9.0876, 9.0190, 8.9509, 8.8832, 8.8160, 8.9199, 8.8531,\n 8.9565, 9.0593, 9.1617, 9.0952, 9.1971, 9.2986, 9.3995, 9.3333,\n 9.2676, 9.3680, 9.3026, 9.4026, 9.3375, 9.2729, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman rides a horse.\nSentence 2: A woman is riding a horse.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -1.8598, -1.8985, -1.9370, -1.9753, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.4%", + "z-score": "5.33", + "p value": "4.8e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.0494, 1.9245, 1.8034, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.9795, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.3445, 2.2418, 2.1412, 2.0428, 1.9462, 2.1602,\n 2.0647, 1.9711, 1.8791, 1.7889, 1.9959, 1.9064, 2.1094, 2.3094,\n 2.2200, 2.1320, 2.3276, 2.2404, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.0948, 2.2819, 2.2000, 2.1193, 2.0397, 1.9612, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 2.0158, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.8972, 1.8257, 1.9973, 2.1669, 2.0954, 2.0247, 1.9548, 1.8856,\n 2.0517, 1.9829, 1.9149, 1.8475, 1.7809, 1.9437, 1.8773, 1.8116,\n 1.9720, 1.9066, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.9640,\n 1.9009, 1.8385, 1.9920, 1.9298, 2.0817, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.9863, 1.9261, 1.8665, 1.8074, 1.7488, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.8091, 1.9524, 1.8953, 2.0373, 2.1783,\n 2.1210, 2.2608, 2.2037, 2.3422, 2.4797, 2.6163, 2.7520, 2.8868,\n 2.8288, 2.7713, 2.9048, 3.0373, 3.1690, 3.1113, 3.0540, 2.9971,\n 3.1273, 3.2567, 3.1998, 3.1433, 3.2715, 3.2152, 3.3424, 3.4689,\n 3.4126, 3.5382, 3.4821, 3.6067, 3.7306, 3.8538, 3.7975, 3.9198,\n 4.0415, 3.9853, 4.1061, 4.0501, 3.9945, 4.1143, 4.2334, 4.3519,\n 4.4698, 4.5871, 4.7037, 4.6476, 4.7635, 4.7076, 4.8227, 4.7670,\n 4.8815, 4.8260, 4.9397, 4.8845, 4.9975, 5.1100, 5.0548, 5.0000,\n 4.9455, 5.0571, 5.0027, 5.1137, 5.2241, 5.3340])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is eating a banana by a tree.\nSentence 2: A man is eating a banana.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.3871, 0.3303, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.4763, 0.4216,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.3277, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.0590, 13.1547, 13.0690, 13.1644, 13.0795, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.0185, 12.9363, 12.8546, 12.9491, 12.8680, 12.7876, 12.7077,\n 12.6283, 12.5495, 12.4713, 12.3935, 12.3163, 12.4109, 12.3342, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.5745, 12.6667,\n 12.5923, 12.5183, 12.4448, 12.3718, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A cat is playing a key board.\nSentence 2: A man is playing two keyboards.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "12.1%", + "z-score": "-4.22", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.8402,\n -2.8868, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -2.9336, -2.9775, -3.0210, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -3.0448, -3.0870, -3.1288, -3.1704,\n -3.2116, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.3131, -3.3526, -3.3918, -3.4308, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.4499, -3.4879, -3.5256, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.4913, -3.5280, -3.5645,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.7439, -3.7791, -3.8142, -3.8490,\n -3.6919, -3.7270, -3.7619, -3.6068, -3.6420, -3.6770, -3.7117, -3.7463,\n -3.7808, -3.8150, -3.8490, -3.8829, -3.9166, -3.7664, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.9673, -4.0003, -4.0330, -4.0656, -4.0980,\n -3.9526, -3.9853, -4.0177, -4.0501, -4.0822, -4.1143, -4.1461, -4.1779,\n -4.0359, -4.0678, -4.0996, -4.1312, -4.1627, -4.1940, -4.2252, -4.2563,\n -4.2872, -4.1487, -4.1798, -4.2108, -4.2416, -4.2723, -4.3029, -4.3333,\n -4.3637, -4.3938, -4.2585, -4.2889, -4.1546, -4.1851, -4.2155])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.1828, 6.0622,\n 5.9438, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.7416, 6.8849, 6.7886, 6.9305, 6.8354,\n 6.7414, 6.6486, 6.7890, 6.9282, 7.0662, 6.9743, 7.1110, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.1111, 7.2443, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.3758, 7.5048, 7.6328, 7.5472, 7.4625, 7.3786,\n 7.5056, 7.6315, 7.5484, 7.6734, 7.5910, 7.7152, 7.6335, 7.5526,\n 7.4724, 7.3930, 7.3143, 7.4373, 7.5595, 7.4813, 7.6026, 7.7232,\n 7.6456, 7.5687, 7.4924, 7.6120, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.0070, 7.9318, 8.0476, 8.1628, 8.2772, 8.2024, 8.1282, 8.2420,\n 8.1683, 8.0952, 8.2082, 8.3205, 8.4322, 8.5433, 8.4706, 8.5810,\n 8.6908, 8.6186, 8.5469, 8.6560, 8.7646, 8.6933, 8.8013, 8.7305,\n 8.6603, 8.7676, 8.8744, 8.8045, 8.9107, 9.0164, 8.9469, 8.8780,\n 8.8094, 8.9145, 9.0190, 8.9509, 8.8832, 8.9872, 9.0906, 9.1936,\n 9.2960, 9.2287, 9.3306, 9.4321, 9.3651, 9.2986, 9.2324, 9.1667,\n 9.2676, 9.3680, 9.3026, 9.2376, 9.3375, 9.4370, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man chops down a tree with an axe.\nSentence 2: A man cut a tree with an axe.\nSimilarity score:", + "true_label": 4.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.6830, -2.5460, -2.5820, -2.6178, -2.6534, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.3277, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.0590, 13.1547, 13.0690, 13.1644, 13.0795, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.1957, 13.1129, 13.2068, 13.3002, 13.2182, 13.1367, 13.0558,\n 12.9755, 12.8957, 12.8165, 12.7378, 12.6597, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.9540, 13.0460, 12.9691, 12.8928, 12.8169, 12.7416, 12.6667,\n 12.5923, 12.5183, 12.6102, 12.5367, 12.6283, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A kid plays with a toy phone.\nSentence 2: A little boy plays with a toy phone.\nSimilarity score:", + "true_label": 3.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.5751, -1.6222, -1.4536, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.0206,\n -1.0675, -0.9115, -0.9584, -0.8040, -0.8511, -0.6983, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.7143, -0.7593, -0.8041, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.5164, -0.5579, -0.4280, -0.2988, -0.1703,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 2.3190, 2.1776, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.8034, 1.6859, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.9215, 1.8240, 2.0428, 2.2576, 2.1602,\n 2.0647, 1.9711, 1.8791, 2.0870, 1.9959, 2.1997, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.3276, 2.2404, 2.4327, 2.6222, 2.8093, 2.7217,\n 2.6354, 2.8189, 2.7333, 2.6491, 2.5660, 2.4841, 2.4034, 2.5820,\n 2.5019, 2.4228, 2.5983, 2.7717, 2.9433, 2.8638, 3.0330, 2.9542,\n 2.8764, 2.7995, 2.7235, 2.6485, 2.8138, 2.9775, 3.1394, 3.2998,\n 3.4586, 3.3826, 3.5396, 3.4641, 3.3895, 3.3156, 3.2426, 3.1704,\n 3.3243, 3.4768, 3.6279, 3.7778, 3.9263, 3.8534, 4.0004, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.8596, 3.7897, 3.7205, 3.8636, 4.0056,\n 4.1464, 4.0771, 4.2167, 4.1478, 4.0795, 4.0119, 3.9448, 4.0825,\n 4.0158, 3.9497, 4.0859, 4.2212, 4.3554, 4.2893, 4.4224, 4.3566,\n 4.2914, 4.2267, 4.1625, 4.0988, 4.0356, 3.9729, 4.1038, 4.2339,\n 4.3631, 4.3004, 4.4286, 4.3661, 4.3042, 4.2426, 4.1816, 4.1210,\n 4.2475, 4.3733, 4.4983, 4.6225, 4.7460, 4.6850, 4.8076, 4.7469,\n 4.6867, 4.6268, 4.5674, 4.5083, 4.4497, 4.3915, 4.5123, 4.6325,\n 4.7520, 4.6938, 4.8125, 4.7544, 4.6968, 4.6395, 4.5826, 4.5260,\n 4.4698, 4.4140, 4.5311, 4.6476, 4.7635, 4.7076, 4.8227, 4.7670,\n 4.7117, 4.6567, 4.6020, 4.7161, 4.6616, 4.6074, 4.7206, 4.8333,\n 4.9455, 4.8913, 5.0027, 4.9487, 5.0595, 5.0057, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is riding a motorcycle.\nSentence 2: A man is riding a horse.\nSimilarity score:", + "true_label": 1.399999976158142, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.0446, 3.9284, 4.1260, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.4721, 4.3614, 4.5461, 4.7281, 4.6188,\n 4.5115, 4.4061, 4.3026, 4.2008, 4.3788, 4.5544, 4.7278, 4.6268,\n 4.7977, 4.9666, 4.8667, 4.7683, 4.6715, 4.5760, 4.4820, 4.6476,\n 4.8113, 4.9731, 4.8797, 5.0395, 5.1977, 5.1051, 5.0138, 5.1698,\n 5.0795, 4.9904, 5.1444, 5.2970, 5.4480, 5.3594, 5.5088, 5.6569,\n 5.5690, 5.4822, 5.6285, 5.5426, 5.4576, 5.6023, 5.7457, 5.8878,\n 5.8034, 5.9442, 6.0838, 6.0000, 5.9171, 5.8351, 5.7540, 5.6737,\n 5.8114, 5.9481, 6.0837, 6.0038, 6.1382, 6.2716, 6.1923, 6.1137,\n 6.0359, 5.9589, 5.8825, 6.0143, 6.1451, 6.2750, 6.1990, 6.3278,\n 6.4558, 6.3803, 6.3054, 6.2312, 6.1577, 6.0848, 6.2113, 6.3369,\n 6.4618, 6.3892, 6.5130, 6.6361, 6.5639, 6.4923, 6.4213, 6.3509,\n 6.2810, 6.4028, 6.5238, 6.6441, 6.5745, 6.6939, 6.8127, 6.7434,\n 6.6747, 6.7925, 6.7242, 6.6564, 6.7734, 6.8897, 7.0054, 6.9378,\n 7.0527, 7.1670, 7.0998, 7.0330, 6.9667, 7.0801, 7.0142, 7.1270,\n 7.2391, 7.3506, 7.2849, 7.3958, 7.5061, 7.4407, 7.3758, 7.4853,\n 7.4208, 7.3566, 7.4655, 7.5738, 7.6816, 7.6177, 7.7249, 7.8316,\n 7.7679, 7.7047, 7.6418, 7.5794, 7.5173, 7.6231, 7.7285, 7.8333,\n 7.7715, 7.8758, 7.9796, 7.9179, 7.8567, 7.7958, 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is riding a motorcycle.\nSentence 2: A man is riding a horse.\nSimilarity score:", + "true_label": 1.399999976158142, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.0446, 3.9284, 4.1260, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.4721, 4.3614, 4.5461, 4.7281, 4.6188,\n 4.5115, 4.4061, 4.3026, 4.2008, 4.3788, 4.5544, 4.7278, 4.6268,\n 4.7977, 4.9666, 4.8667, 4.7683, 4.6715, 4.5760, 4.4820, 4.6476,\n 4.8113, 4.9731, 4.8797, 5.0395, 5.1977, 5.1051, 5.0138, 5.1698,\n 5.0795, 4.9904, 5.1444, 5.2970, 5.4480, 5.3594, 5.5088, 5.6569,\n 5.5690, 5.4822, 5.6285, 5.5426, 5.4576, 5.6023, 5.7457, 5.8878,\n 5.8034, 5.9442, 6.0838, 6.0000, 5.9171, 5.8351, 5.7540, 5.6737,\n 5.8114, 5.9481, 6.0837, 6.0038, 6.1382, 6.2716, 6.1923, 6.1137,\n 6.0359, 5.9589, 5.8825, 6.0143, 6.1451, 6.2750, 6.1990, 6.3278,\n 6.4558, 6.3803, 6.3054, 6.2312, 6.1577, 6.0848, 6.2113, 6.3369,\n 6.4618, 6.3892, 6.5130, 6.6361, 6.5639, 6.4923, 6.4213, 6.3509,\n 6.2810, 6.4028, 6.5238, 6.6441, 6.5745, 6.6939, 6.8127, 6.7434,\n 6.6747, 6.7925, 6.7242, 6.6564, 6.7734, 6.8897, 7.0054, 6.9378,\n 7.0527, 7.1670, 7.0998, 7.0330, 6.9667, 7.0801, 7.0142, 7.1270,\n 7.2391, 7.3506, 7.2849, 7.3958, 7.5061, 7.4407, 7.3758, 7.4853,\n 7.4208, 7.3566, 7.4655, 7.5738, 7.6816, 7.6177, 7.7249, 7.8316,\n 7.7679, 7.7047, 7.6418, 7.5794, 7.5173, 7.6231, 7.7285, 7.8333,\n 7.7715, 7.8758, 7.9796, 7.9179, 7.8567, 7.7958, 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A squirrel is spinning around in circles.\nSentence 2: A squirrel runs around in circles.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -0.7559, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -0.8511, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.7295, -0.7703, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.3795, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.5191, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.2623, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.6139, 11.7222, 11.6242, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.4915, 12.5930, 12.6939, 12.6012, 12.7017,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.5039, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.7926, 13.8857, 13.9784, 14.0707, 14.1625, 14.2539,\n 14.3449, 14.4355, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 14.9755, 14.8912, 14.9786, 14.8950, 14.9821,\n 14.8991, 14.8167, 14.9037, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.2563, 15.1761, 15.2609, 15.3454, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man and a woman are kissing.\nSentence 2: A man and woman kiss.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.3311, 0.4714, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.5069, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.6351, 0.5879, 0.5410, 0.6742, 0.8066, 0.9382, 1.0690,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 1.1717, 1.1251, 1.2514, 1.3771, 1.3303, 1.2839, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "63.1%", + "z-score": "12.4", + "p value": "1.46e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.2410, 9.1287, 9.0179, 9.1493, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.7778, 9.8987, 9.7986, 9.9187, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.1948, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.6894, 10.8012, 10.9123, 10.8186,\n 10.9291, 10.8363, 10.9462, 11.0554, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.4261, 11.3373, 11.2493, 11.1621,\n 11.2674, 11.3721, 11.2857, 11.2001, 11.3043, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.4209, 11.5217, 11.4411, 11.3610, 11.4614, 11.3820, 11.3032, 11.4031,\n 11.5026, 11.4244, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.9534, 11.8771, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.5183, 12.4448, 12.5367, 12.4638, 12.3912])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is getting into a car.\nSentence 2: A man is getting into a car in a garage.\nSimilarity score:", + "true_label": 3.8329999446868896, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.0543, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "158", + "Fraction of T in Greenlist": "79.4%", + "z-score": "17.7", + "p value": "1.43e-70", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 10.9585, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.6533, 12.7597, 12.8653, 12.7569, 12.8622, 12.7551, 12.6491,\n 12.7542, 12.8586, 12.9624, 12.8582, 12.9616, 13.0644, 13.1665, 13.2681,\n 13.3690, 13.4694, 13.5693, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.9594, 14.0561, 14.1524, 14.2481, 14.3434, 14.2443, 14.3393, 14.4338,\n 14.5277, 14.6212, 14.7143, 14.8069, 14.7098, 14.8021, 14.8940, 14.9854,\n 15.0763, 15.1669, 15.2570, 15.3467, 15.4360, 15.5249, 15.6133, 15.7014,\n 15.7890, 15.8763, 15.9632, 16.0497, 16.1358, 16.2216, 16.1283, 16.2139,\n 16.2990, 16.2068, 16.2917, 16.2003, 16.1095, 16.1945, 16.2791, 16.3633,\n 16.2736, 16.3577, 16.4414, 16.5247, 16.6078, 16.6905, 16.7728, 16.8549,\n 16.7668, 16.8487, 16.9302, 17.0115, 17.0924, 17.1730, 17.2533, 17.3333,\n 17.4130, 17.4925, 17.4062, 17.4855, 17.5644, 17.6431, 17.7215])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is dancing.\nSentence 2: A man is talking.\nSimilarity score:", + "true_label": 0.6000000238418579, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.2962, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.2828, 0.4229, 0.3746,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.5740, 0.5283, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.6737, 0.7979, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.8248, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "62.1%", + "z-score": "12.1", + "p value": "8.29e-34", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.3033, 7.1909, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.3217, 8.2178, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.3088, 9.2094, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 9.9547, 9.8632, 9.7725, 9.8877, 9.7980,\n 9.9124, 10.0261, 9.9373, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.4537, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.5714, 10.4898,\n 10.5955, 10.5145, 10.4341, 10.3544, 10.4596, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.5261, 11.4533, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.0419, 12.1347, 12.0630])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing the guitar and singing.\nSentence 2: A man is playing the guitar.\nSimilarity score:", + "true_label": 2.9170000553131104, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.2349, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.3974, 0.5283, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.3004, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.2089, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 10.8421, 10.7671, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.5235, 11.4525, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person is cutting mushrooms.\nSentence 2: A person is cutting mushrooms with a knife.\nSimilarity score:", + "true_label": 4.199999809265137, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.3365, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.3736, 0.3189, 0.4763, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.4020, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.3698, 0.3225, 0.4593, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.4885, 0.4428, 0.5740, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.5991, 0.5548, 0.6810,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.4949, 0.6170, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.1156, 2.9704, 2.8301, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.5968, 4.8003, 5.0000,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.7047, 7.8428, 7.9796, 8.1152, 8.0139,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.7600, 8.8833, 9.0057, 8.9138, 8.8228, 8.7327, 8.8544,\n 8.7652, 8.6770, 8.5896, 8.5030, 8.6238, 8.5381, 8.4532, 8.3691,\n 8.4891, 8.4057, 8.3231, 8.2413, 8.1602, 8.0798, 8.1989, 8.1192,\n 8.2375, 8.3550, 8.2760, 8.1976, 8.1198, 8.2365, 8.1594, 8.2754,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.6556, 8.7681, 8.8800, 8.8039,\n 8.7284, 8.6535, 8.5792, 8.5054, 8.6165, 8.7270, 8.6537, 8.7636,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.2311, 9.3374, 9.4432,\n 9.3708, 9.2990, 9.4042, 9.5089, 9.6130, 9.5416, 9.4707, 9.4002,\n 9.3302, 9.4338, 9.5369, 9.4673, 9.5698, 9.6719, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.0061, 10.1058, 10.2050, 10.1363, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.2949, 10.2273, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A tiger cub is making a sound.\nSentence 2: A tiger is walking around.\nSimilarity score:", + "true_label": 2.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.4377, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.2146, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.5323, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.5068, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.7889, 1.7002, 1.9064, 2.1094, 2.0207,\n 1.9335, 1.8477, 2.0455, 1.9604, 1.8766, 2.0702, 1.9870, 2.1773,\n 2.3651, 2.2819, 2.2000, 2.3842, 2.3028, 2.2226, 2.1436, 2.3238,\n 2.2453, 2.4228, 2.5983, 2.5198, 2.4423, 2.3658, 2.2902, 2.4618,\n 2.3868, 2.5560, 2.7235, 2.6485, 2.5743, 2.7393, 2.6656, 2.5927,\n 2.5207, 2.6828, 2.6112, 2.7713, 2.9299, 2.8583, 2.7875, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.6667, 2.5991, 2.7524, 2.9044, 2.8368,\n 2.7699, 2.9200, 2.8534, 2.7875, 2.7222, 2.6575, 2.8051, 2.7406,\n 2.8868, 3.0317, 2.9673, 2.9035, 3.0467, 2.9832, 2.9202, 2.8577,\n 2.9991, 2.9369, 3.0770, 3.0151, 3.1539, 3.0923, 3.2299, 3.3665,\n 3.3049, 3.4403, 3.3789, 3.5132, 3.6466, 3.5853, 3.7176, 3.8490,\n 3.9795, 4.1092, 4.2381, 4.3661, 4.4933, 4.4312, 4.5575, 4.6829,\n 4.8076, 4.9316, 5.0548, 5.1772, 5.2989, 5.2362, 5.3571, 5.4772,\n 5.5967, 5.5340, 5.6527, 5.7707, 5.8880, 6.0047, 5.9420, 5.8797,\n 5.9956, 5.9336, 6.0487, 6.1632, 6.2770, 6.2152, 6.1537, 6.0927,\n 6.2057, 6.3180, 6.2572, 6.1968, 6.3084, 6.4194, 6.3592, 6.4695,\n 6.4096, 6.5193, 6.4597, 6.5688, 6.6774, 6.7854, 6.8930, 7.0000,\n 6.9403, 7.0467, 6.9873, 6.9282, 7.0340, 7.1393, 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person is slicing onions.\nSentence 2: A person is peeling an onion.\nSimilarity score:", + "true_label": 2.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -1.7942, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -1.9695,\n -2.0196, -2.0692, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.0309, -2.0785, -2.1256, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -2.0548, -2.0997, -2.1442, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.1268, -2.1691, -2.2111, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.1880, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.2188, -2.2578, -2.2966, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.3635, -2.4004, -2.4371,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.4195, -2.4553, -2.4910, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.7992, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.0219, 6.8718, 6.7254, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.3257, 10.2222, 10.3411, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.6061, 10.7211, 10.8353, 10.7362, 10.6380, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.9060, 11.8151, 11.7249, 11.8287, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.0611, 12.1622, 12.2628, 12.1756,\n 12.0891, 12.0032, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.1367, 13.0558,\n 13.1491, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.3585, 13.4499,\n 13.5408, 13.4620, 13.3838, 13.3060, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing the piano.\nSentence 2: A man is playing the trumpet.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is peeling a potato.\nSentence 2: A woman is peeling an apple.\nSimilarity score:", + "true_label": 2.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.8980, 0.8337, 1.0070, 0.9428,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.3311, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.5069, 0.4593, 0.5952, 0.5477,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.7789, 0.7336, 0.8607, 0.8154, 0.9415, 0.8963, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 1.0444, 1.1667,\n 1.1221, 1.0777, 1.0336, 0.9897, 1.1106, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.5149, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.8025, 10.9107, 10.8224, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.7326, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.1646, 12.2627, 12.1805, 12.0990, 12.1967, 12.1158,\n 12.0355, 11.9558, 11.8766, 11.7980, 11.7200, 11.6425, 11.5655, 11.4891,\n 11.4132, 11.5109, 11.4356, 11.3608, 11.2864, 11.2126, 11.1392, 11.0663,\n 10.9939, 10.9220, 10.8505, 10.7795, 10.7090, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.6944, 10.6256, 10.5573, 10.4893, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A pankda is eating bamboo.\nSentence 2: A panda bear is eating some bamboo.\nSimilarity score:", + "true_label": 4.199999809265137, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.1437, -2.1997, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -1.9695,\n -2.0196, -2.0692, -1.8762, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.7614, -1.8071, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.7392, -1.7823,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.2857, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.2096, -1.2521, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.4008, -1.4393, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "152", + "Fraction of T in Greenlist": "76.4%", + "z-score": "16.7", + "p value": "3.39e-63", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 2.7952, 3.0290, 3.2577, 3.1334, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.4101, 3.6141, 3.8146, 4.0119,\n 4.2060, 4.0937, 3.9837, 4.1740, 4.3614, 4.5461, 4.4371, 4.3301,\n 4.5115, 4.6904, 4.5847, 4.4809, 4.3788, 4.5544, 4.7278, 4.8990,\n 4.7977, 4.9666, 5.1333, 5.2981, 5.1978, 5.3605, 5.5213, 5.6804,\n 5.5811, 5.7382, 5.8936, 5.7955, 5.9491, 6.1012, 6.0041, 6.1546,\n 6.3035, 6.4510, 6.5970, 6.5008, 6.6454, 6.7886, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.5569, 6.4663, 6.6066, 6.7456, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.7986, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.5783, 15.6633, 15.7481, 15.8325, 15.9165, 16.0002, 16.0836, 16.1667,\n 16.2494, 16.3318, 16.4139, 16.4957, 16.5772, 16.6584, 16.7393])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person is peeling an onion.\nSentence 2: A person is peeling an eggplant.\nSimilarity score:", + "true_label": 2.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.3170, -2.3658, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.2629, -2.3102, -2.3570,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.3101, -2.3552, -2.4000, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.5726, -2.6131, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.5373, -2.5769, -2.6163, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.5841, -2.6224,\n -2.6605, -2.5123, -2.5506, -2.5886, -2.6264, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.7454, -2.7815, -2.8174, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.8208, -2.8561, -2.7187, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.7940, -2.8287, -2.8633, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 2.3190, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.8098, 2.6811, 2.5560,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.3238, 2.5538, 2.4422, 2.6667,\n 2.8868, 2.7761, 2.6679, 2.5621, 2.4585, 2.6713, 2.5690, 2.7775,\n 2.9824, 2.8808, 2.7811, 2.6833, 2.8830, 3.0796, 3.2733, 3.1754,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.2667, 3.1743, 3.3574, 3.2660,\n 3.4463, 3.6242, 3.5333, 3.7087, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.5283, 3.4429, 3.3587, 3.5277, 3.6947, 3.6109, 3.7758, 3.6927,\n 3.6107, 3.7732, 3.6919, 3.6116, 3.5322, 3.4538, 3.6133, 3.7712,\n 3.6931, 3.8492, 3.7717, 3.6950, 3.8490, 3.7730, 3.6977, 3.8497,\n 3.7750, 3.9254, 4.0745, 4.0000, 4.1475, 4.0736, 4.0004, 4.1461,\n 4.0734, 4.0015, 4.1455, 4.0740, 4.2167, 4.3583, 4.2870, 4.4272,\n 4.3564, 4.2862, 4.4249, 4.3552, 4.2861, 4.2176, 4.1498, 4.2866,\n 4.4225, 4.3548, 4.4895, 4.4222, 4.3554, 4.4888, 4.4224, 4.3566,\n 4.2914, 4.2267, 4.3583, 4.4891, 4.4246, 4.5543, 4.4901, 4.4264,\n 4.5549, 4.4915, 4.4286, 4.3661, 4.3042, 4.4312, 4.5575, 4.4956,\n 4.6209, 4.5594, 4.4983, 4.6225, 4.5617, 4.5013, 4.4413, 4.3818,\n 4.5047, 4.6268, 4.5674, 4.6887, 4.6295, 4.5707, 4.6911, 4.6325,\n 4.5744, 4.6938, 4.6359, 4.7544, 4.8724, 4.8146, 4.9317, 4.8742,\n 4.8170, 4.9333, 4.8763, 4.8197, 4.7635, 4.8787, 4.9934, 5.1075,\n 5.0513, 5.1647, 5.1086, 5.0529, 5.1655, 5.1100, 5.0548, 5.1667,\n 5.1117, 5.2229, 5.3335, 5.2786, 5.3886, 5.3340, 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A monkey pushes another monkey.\nSentence 2: The monkey pushed the other monkey.\nSimilarity score:", + "true_label": 4.800000190734863, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.6984, -2.7361, -2.7735, -2.8107, -2.8478, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.0674, -3.1009, -3.1342, -3.1674, -3.2004, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.5991, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.3249, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.1816, 8.3138, 8.4449, 8.3463, 8.2488, 8.1524,\n 8.2825, 8.1873, 8.0931, 8.0000, 8.1291, 8.2572, 8.1651, 8.0741,\n 7.9839, 8.1111, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.0111,\n 7.9254, 7.8406, 7.9649, 8.0882, 8.0042, 8.1266, 8.0434, 7.9608,\n 7.8791, 7.7981, 7.7178, 7.8393, 7.9600, 7.8803, 7.8014, 7.9212,\n 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.9567, 8.8800, 8.8039,\n 8.9151, 9.0257, 8.9502, 9.0601, 9.1694, 9.0944, 9.0200, 9.1287,\n 9.2368, 9.1629, 9.2704, 9.3774, 9.4837, 9.5896, 9.5161, 9.4432,\n 9.5485, 9.6532, 9.7574, 9.8611, 9.7886, 9.8918, 9.9944, 10.0965,\n 10.0245, 10.1262, 10.0547, 10.1558, 10.0848, 10.1855, 10.1149, 10.2151,\n 10.1450, 10.0753, 10.1750, 10.2743, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.7222, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A squirrel runs around in circles.\nSentence 2: A squirrel is moving in circles.\nSimilarity score:", + "true_label": 4.400000095367432, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -0.7559, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -0.8268, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.4974, -0.5410, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.5864, -0.6266, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.4686, 7.3485,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.5769, 10.6894, 10.5955, 10.7074, 10.6145,\n 10.7257, 10.8363, 10.7444, 10.8544, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 11.1073, 11.0183, 11.1253, 11.2316, 11.1435, 11.2493, 11.1621,\n 11.2674, 11.3721, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.6234, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.5408, 13.6313, 13.5526, 13.6429, 13.7327, 13.8222, 13.7442, 13.8333,\n 13.7559, 13.8447, 13.9332, 13.8564, 13.9446, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is tying his shoe.\nSentence 2: A man ties his shoe.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -1.8598, -1.8985, -1.9370, -1.9753, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.0370, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.5165, 3.3853, 3.6098, 3.4816, 3.7009, 3.9158, 4.1265, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.8431, 4.0446, 4.2426, 4.1260, 4.0119,\n 3.9001, 3.7905, 3.9837, 4.1740, 4.3614, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.6568, 4.5544, 4.7278, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.0332, 4.9346, 4.8375, 4.7419, 4.9058,\n 5.0679, 5.2281, 5.1332, 5.0395, 4.9472, 4.8561, 5.0138, 5.1698,\n 5.3243, 5.2338, 5.1444, 5.0562, 4.9691, 5.1212, 5.2719, 5.4212,\n 5.3345, 5.2489, 5.1643, 5.0807, 5.2278, 5.3736, 5.5181, 5.4349,\n 5.3526, 5.4956, 5.4140, 5.5556, 5.6959, 5.8351, 5.7540, 5.6737,\n 5.5942, 5.7318, 5.8684, 6.0038, 6.1382, 6.2716, 6.1923, 6.3246,\n 6.4558, 6.3770, 6.5072, 6.4291, 6.5583, 6.6865, 6.6089, 6.5320,\n 6.4558, 6.5828, 6.7090, 6.6332, 6.7585, 6.6833, 6.8076, 6.7330,\n 6.6591, 6.7823, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.5056,\n 7.6235, 7.7407, 7.6667, 7.7831, 7.8988, 7.8253, 7.9403, 8.0546,\n 8.1683, 8.0952, 8.2082, 8.1356, 8.0636, 8.1758, 8.2874, 8.2158,\n 8.1448, 8.0742, 8.0042, 8.1150, 8.2252, 8.3349, 8.2652, 8.1960,\n 8.3050, 8.2362, 8.3446, 8.4523, 8.5595, 8.6662, 8.7724, 8.8780,\n 8.9830, 9.0876, 9.1916, 9.2952, 9.3982, 9.3295, 9.4320, 9.3638,\n 9.2960, 9.3980, 9.4995, 9.6005, 9.7011, 9.8012, 9.9008, 9.8333,\n 9.9325, 9.8654, 9.7987, 9.7325, 9.8311, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A boy is singing and playing the piano.\nSentence 2: A boy is playing the piano.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.6330,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -1.9291, -1.7857, -1.8257,\n -1.8656, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.6632, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.5159, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.2348, 3.4528, 3.3333,\n 3.2167, 3.4293, 3.3147, 3.5228, 3.7273, 3.9284, 3.8146, 4.0119,\n 4.2060, 4.0937, 3.9837, 4.1740, 4.3614, 4.2528, 4.4371, 4.6188,\n 4.5115, 4.6904, 4.5847, 4.7610, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.3383, 5.2350, 5.1333, 5.2981, 5.4610, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.1996, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.6454, 6.7886, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.0211, 6.9282, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.1556, 7.0657, 6.9768, 6.8889, 7.0231, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.5472, 7.4625, 7.3786,\n 7.5056, 7.6315, 7.5484, 7.6734, 7.5910, 7.5094, 7.4286, 7.3485,\n 7.4724, 7.5955, 7.5161, 7.6383, 7.5595, 7.6808, 7.8014, 7.7232,\n 7.8429, 7.7653, 7.6883, 7.8072, 7.7308, 7.6551, 7.7732, 7.8905,\n 8.0070, 8.1229, 8.2381, 8.3526, 8.2772, 8.3910, 8.5041, 8.4293,\n 8.5417, 8.4674, 8.5792, 8.6903, 8.6165, 8.5433, 8.4706, 8.3984,\n 8.5088, 8.6186, 8.7278, 8.6560, 8.7646, 8.8726, 8.9800, 9.0869,\n 9.1932, 9.2990, 9.2276, 9.3328, 9.4375, 9.3665, 9.4707, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 9.8746,\n 9.9752, 9.9060, 10.0061, 9.9374, 9.8691, 9.9687, 10.0679, 10.1667,\n 10.0987, 10.1970, 10.2949, 10.2273, 10.1602, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A dog is eating water melon.\nSentence 2: A dog is eating a piece of watermelon.\nSimilarity score:", + "true_label": 4.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.2257, -1.2686, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.1375, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.3088, -1.1790, -1.2179, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.3550,\n 9.2410, 9.1287, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.7376,\n 9.6348, 9.5331, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 9.8414, 9.9601, 10.0779, 9.9813, 9.8858, 10.0029, 10.1193,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.2763, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.6733, 10.7828, 10.8916,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.1435, 11.2493, 11.1621,\n 11.0756, 10.9898, 11.0952, 11.0102, 10.9259, 11.0309, 10.9473, 11.0517,\n 11.1556, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.8210, 11.9197, 12.0180, 11.9377,\n 12.0355, 11.9558, 11.8766, 11.7980, 11.8956, 11.8176, 11.7401, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.0493, 12.1447, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.7585, 12.8499, 12.9410, 13.0316, 13.1219, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is chopping broccoli.\nSentence 2: A woman is chopping broccoli with a knife.\nSimilarity score:", + "true_label": 4.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.4778, 0.4286, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.4944, 0.4481, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.4816, 3.3566, 3.5753, 3.4528, 3.3333,\n 3.2167, 3.1027, 2.9913, 2.8823, 3.0929, 3.2998, 3.5032, 3.7033,\n 3.9001, 4.0937, 3.9837, 3.8759, 4.0657, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 3.7383, 3.9208, 3.8228, 3.7264, 3.9056, 3.8103,\n 3.7166, 3.8927, 3.8000, 3.7087, 3.8819, 3.7916, 3.7025, 3.8730,\n 3.7849, 3.6979, 3.8657, 3.7796, 3.6947, 3.8600, 3.7758, 3.6927,\n 3.8555, 3.7732, 3.6919, 3.8523, 3.7717, 3.6920, 3.8503, 3.7712,\n 3.6931, 3.8492, 3.7717, 3.6950, 3.8490, 3.7730, 3.6977, 3.8497,\n 3.7750, 3.7011, 3.8512, 3.7778, 3.7051, 3.8534, 3.7812, 3.7097,\n 3.8562, 3.7852, 3.7148, 3.8596, 3.7897, 3.7205, 3.8636, 3.7947,\n 3.7265, 3.8680, 3.8002, 3.7330, 3.6664, 3.8061, 3.9448, 4.0825,\n 4.0158, 3.9497, 3.8841, 3.8191, 3.9549, 3.8903, 4.0249, 4.1586,\n 4.2914, 4.4233, 4.5542, 4.6843, 4.6191, 4.5543, 4.6832, 4.8113,\n 4.9385, 5.0649, 5.0000, 4.9356, 5.0609, 5.1854, 5.1213, 5.2449,\n 5.1810, 5.3038, 5.4257, 5.3621, 5.4832, 5.6036, 5.7234, 5.6598,\n 5.7787, 5.8969, 6.0145, 6.1314, 6.2476, 6.3632, 6.2994, 6.2361,\n 6.3509, 6.4650, 6.5785, 6.6914, 6.6282, 6.5653, 6.6775, 6.7890,\n 6.9000, 6.8373, 6.9477, 6.8853, 6.9950, 7.1041, 7.2127, 7.1506,\n 7.2585, 7.3660, 7.4729, 7.5794, 7.6853, 7.7907, 7.7285, 7.6667,\n 7.7715, 7.8758, 7.9796, 8.0829, 8.0212, 7.9599, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is peeling a potato.\nSentence 2: A man peeled a potatoe.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -0.9933, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -0.9245, -0.7833, -0.8268, -0.6868, -0.7303,\n -0.5915, -0.6351, -0.4974, -0.3607, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.6598, -0.6993, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 2.1831, 2.0605, 1.9415, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 1.8728, 2.1054, 2.3333,\n 2.2269, 2.1229, 2.0211, 2.2418, 2.1412, 2.0428, 2.2576, 2.1602,\n 2.0647, 1.9711, 1.8791, 1.7889, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.6803, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.7638, 1.6908, 1.6187, 1.7951, 1.7233,\n 1.6524, 1.8257, 1.9973, 1.9262, 1.8559, 1.7865, 1.9548, 1.8856,\n 1.8173, 1.9829, 1.9149, 1.8475, 2.0107, 2.1723, 2.1049, 2.0381,\n 1.9720, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.9640,\n 2.1182, 2.0548, 1.9920, 1.9298, 2.0817, 2.2323, 2.1700, 2.3190,\n 2.4669, 2.4045, 2.5508, 2.6961, 2.8402, 2.7775, 2.9202, 3.0619,\n 3.2025, 3.3420, 3.2788, 3.4171, 3.3542, 3.4913, 3.4286, 3.5645,\n 3.5022, 3.6369, 3.7707, 3.9036, 3.8411, 3.7791, 3.7176, 3.8490,\n 3.9795, 3.9181, 3.8571, 3.9865, 3.9258, 4.0541, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.3128, 4.2527, 4.1931, 4.3176, 4.4413, 4.5644,\n 4.5047, 4.6268, 4.7483, 4.8690, 4.9891, 4.9292, 4.8698, 4.8107,\n 4.9297, 4.8709, 4.9891, 4.9305, 5.0479, 4.9896, 4.9317, 5.0483,\n 5.1642, 5.1064, 5.0489, 4.9918, 5.1068, 5.2211, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.2775, 5.3898, 5.5015, 5.4451, 5.3891, 5.3333,\n 5.4442, 5.5545, 5.6643, 5.6085, 5.5532, 5.4981, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is playing a guitar.\nSentence 2: A man plays a guitar.\nSimilarity score:", + "true_label": 2.4000000953674316, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.0684, 5.2485, 5.1326, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.0410, 5.9386,\n 5.8377, 5.7382, 5.8936, 6.0474, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.0587, 6.2075, 6.3549, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.0211, 7.1591, 7.0662, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.6102, 8.5210, 8.6436,\n 8.5553, 8.6770, 8.5896, 8.5030, 8.4173, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.2554, 9.1735, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.2516, 10.1749, 10.2790, 10.2029, 10.3065, 10.2310, 10.1559,\n 10.0814, 10.0074, 10.1106, 10.0371, 10.1398, 10.2419, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.7978, 10.7258,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.7090, 10.8064, 10.7363, 10.8333,\n 10.7637, 10.6944, 10.7910, 10.8872, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is slicing tomato.\nSentence 2: A man is slicing onion.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.0849, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.5384, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.7442, 4.6667, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.5448,\n 5.6725, 5.7994, 5.7295, 5.6602, 5.5915, 5.5233, 5.4557, 5.5811,\n 5.5138, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.7411, 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.7787, 5.7155, 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 6.1632, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man swims underwater.\nSentence 2: A woman is swimming underwater.\nSimilarity score:", + "true_label": 2.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -1.9345, -1.9829, -2.0309, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.0412,\n -2.0841, -2.1268, -1.9673, -1.8091, -1.6521, -1.4963, -1.5404, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.3904, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.4194, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.6241, -1.4881, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.8237, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.9355, 11.0488, 10.9488, 10.8498, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.5515, 11.4574, 11.3642, 11.2719, 11.1803, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.3204, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.7696, 11.8719, 11.9737, 11.8870, 11.8010,\n 11.9024, 11.8172, 11.7326, 11.8336, 11.9341, 12.0341, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.2627, 12.3603, 12.2782, 12.3754, 12.4722,\n 12.3908, 12.4872, 12.4065, 12.3263, 12.4223, 12.3428, 12.2638, 12.3595,\n 12.4547, 12.3764, 12.2987, 12.3935, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.6918, 12.6153, 12.7082, 12.8007, 12.7248, 12.6494, 12.7416, 12.8333,\n 12.9247, 12.8499, 12.9410, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 3.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man and woman are talking.\nSentence 2: A man and woman is eating.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.1721, -0.2146, -0.2568, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.2100, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.0829, 0.0413, 0.0000, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.0702, 8.9763, 9.0987, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A small dog is chasing a yoga ball.\nSentence 2: A dog is chasing a ball.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 1.0498, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.3373, 1.2808, 1.2247,\n 1.3725, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.4744, 1.4201, 1.5614, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.7913, 1.7384, 1.6859,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.6827, 1.6336, 1.5848, 1.5363, 1.4881, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.6378, 1.7592, 1.7128, 1.6667,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 2.5924, 2.8577,\n 2.7136, 2.9704, 2.8301, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 3.9158, 3.7897, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.0446, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.0928, 6.2505, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.9378, 6.8391, 6.7416, 6.8849, 6.7886, 6.9305, 7.0711,\n 6.9759, 6.8819, 7.0211, 7.1591, 7.0662, 6.9743, 6.8834, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.7192, 7.6328, 7.7598, 7.8859, 7.8003,\n 7.7155, 7.6315, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.0006, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.3172,\n 8.4348, 8.5516, 8.4718, 8.3927, 8.5088, 8.6241, 8.5456, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.6556, 8.7681, 8.8800, 8.8039,\n 8.7284, 8.8396, 8.7647, 8.8752, 8.9851, 9.0944, 9.0200, 9.1287,\n 9.2368, 9.1629, 9.0895, 9.1970, 9.3040, 9.2311, 9.1587, 9.0869,\n 9.0155, 9.1218, 9.2276, 9.3328, 9.4375, 9.5416, 9.4707, 9.4002,\n 9.5038, 9.4338, 9.5369, 9.6394, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.8054, 9.7367, 9.8373, 9.9374, 9.8691, 9.8012, 9.7337, 9.6667,\n 9.7663, 9.8654, 9.9641, 10.0624, 10.1602, 10.0935, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The men are playing cricket.\nSentence 2: The men are playing basketball.\nSimilarity score:", + "true_label": 2.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.7809, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.3531, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.0479, -0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.7047, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.7555, 7.8889, 7.7937, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.9079, 8.0370, 8.1651, 8.0741,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.2733, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.2867, 9.2055, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.1905, 9.1119, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.4619, 9.3863, 9.3113,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.8433, 9.7688, 9.8736, 9.9778,\n 10.0814, 10.0074, 10.1106, 10.0371, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 11.0913, 11.0194, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.5470, 11.4765, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man rides off on a motorcycle.\nSentence 2: A man is riding on a motorcycle.\nSimilarity score:", + "true_label": 4.400000095367432, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -1.8598, -1.8985, -1.9370, -1.9753, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.5680, 0.7201, 0.8709, 1.0206,\n 1.1692, 1.3166, 1.2611, 1.4071, 1.5519, 1.6958, 1.8385, 1.9803,\n 1.9237, 2.0642, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.3094,\n 2.4453, 2.3891, 2.5238, 2.4678, 2.4122, 2.3570, 2.4902, 2.4351,\n 2.5672, 2.5123, 2.4578, 2.5886, 2.5343, 2.4803, 2.4267, 2.5560,\n 2.6846, 2.8124, 2.9394, 3.0657, 3.1912, 3.1368, 3.2614, 3.3853,\n 3.3309, 3.4539, 3.3996, 3.3457, 3.4677, 3.4140, 3.5351, 3.6556,\n 3.6019, 3.5485, 3.4954, 3.4427, 3.5619, 3.6805, 3.6277, 3.5753,\n 3.5232, 3.6407, 3.7576, 3.8739, 3.9896, 4.1048, 4.2193, 4.1667,\n 4.2805, 4.3938, 4.3412, 4.4538, 4.4014, 4.5134, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A man is singing and playing a guitar.\nSimilarity score:", + "true_label": 3.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man talked on the telephone.\nSentence 2: The man is talking on the phone.\nSimilarity score:", + "true_label": 3.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.4611, 0.6124,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.4778, 0.4286, 0.3797, 0.3311, 0.4714, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.5952, 0.5477,\n 0.6825, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.6437, 0.5991, 0.7255, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.6186, 9.5346, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.1423, 10.0631, 10.1695, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.3893, 10.3154, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.3999, 10.3280, 10.4281, 10.5278, 10.4563, 10.5556,\n 10.4846, 10.4140, 10.3439, 10.4427, 10.5410, 10.4713, 10.4021, 10.3333,\n 10.4312, 10.5286, 10.4603, 10.5573, 10.4893, 10.5859, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is fishing.\nSentence 2: A man is exercising.\nSimilarity score:", + "true_label": 0.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.0973, -1.1429, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.0974, -1.1380, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.6000, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.8007, 8.6942, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.5448, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.5079, 8.4116, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.5769, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.7652, 8.8860, 9.0060, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.4185, 9.5338, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.7822, 10.7006, 10.8051, 10.9091, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.1076, 12.2033, 12.1260, 12.2214, 12.3163, 12.4109, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.7248, 12.8169, 12.7416, 12.8333,\n 12.9247, 13.0157, 12.9410, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is levitating.\nSentence 2: A man is talking.\nSimilarity score:", + "true_label": 0.800000011920929, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -0.8040, -0.8511, -0.8978, -0.7454, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.7048, -0.5620,\n -0.6068, -0.4652, -0.3246, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.3088, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "154", + "Fraction of T in Greenlist": "77.4%", + "z-score": "17.1", + "p value": "1.31e-65", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 10.9585, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.7597, 12.8653, 12.9704, 13.0748, 12.9668, 12.8599,\n 12.9641, 13.0677, 13.1707, 13.0656, 13.1681, 13.2701, 13.1665, 13.0639,\n 12.9624, 13.0643, 13.1657, 13.2665, 13.3667, 13.4664, 13.5655, 13.6640,\n 13.7621, 13.8595, 13.9565, 14.0530, 14.1489, 14.0505, 14.1462, 14.2413,\n 14.3360, 14.2390, 14.1429, 14.2374, 14.3314, 14.4250, 14.3302, 14.4234,\n 14.5162, 14.4225, 14.5150, 14.6071, 14.6987, 14.7899, 14.6976, 14.7885,\n 14.8790, 14.9691, 15.0588, 15.1481, 15.2369, 15.3254, 15.4135, 15.3230,\n 15.4108, 15.4983, 15.5853, 15.6720, 15.7584, 15.6692, 15.5808, 15.6670,\n 15.7529, 15.8384, 15.9235, 16.0083, 16.0928, 16.1769, 16.2607, 16.3441,\n 16.4272, 16.5100, 16.5925, 16.6746, 16.7564, 16.8379, 16.9191, 16.8333,\n 16.9143, 16.9950, 17.0754, 16.9906, 16.9063, 16.9866, 17.0667])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Two boys are driving.\nSentence 2: Two bays are dancing.\nSimilarity score:", + "true_label": 0.6000000238418579, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.3206, 1.5181, 1.7130, 1.9052,\n 1.8245, 2.0135, 1.9333, 1.8543, 1.7765, 1.6997, 1.8838, 1.8074,\n 1.7321, 1.9127, 1.8378, 2.0158, 1.9413, 2.1167, 2.0426, 1.9695,\n 1.8972, 2.0692, 1.9973, 1.9262, 1.8559, 2.0247, 1.9548, 2.1213,\n 2.0517, 1.9829, 1.9149, 1.8475, 1.7809, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.2808, 1.4289,\n 1.3725, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.6398, 1.7823,\n 1.7264, 1.8676, 1.8119, 1.7566, 1.7018, 1.6473, 1.7864, 1.7321,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.6641, 1.6127, 1.5617, 1.6941, 1.6432,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.7028, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.4857, 1.4393, 1.3933, 1.5159, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.1334, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.3824, 3.5777, 3.4743, 3.6662, 3.8552, 3.7528,\n 3.6522, 3.5533, 3.4562, 3.6407, 3.5447, 3.7264, 3.9056, 3.8103,\n 3.7166, 3.8927, 3.8000, 3.9736, 3.8819, 4.0531, 4.2222, 4.1312,\n 4.2981, 4.4630, 4.3727, 4.2836, 4.1957, 4.1090, 4.0234, 4.1851,\n 4.1003, 4.2601, 4.4182, 4.3339, 4.4901, 4.6448, 4.5611, 4.4783,\n 4.3966, 4.3158, 4.2359, 4.3879, 4.3086, 4.4590, 4.6079, 4.5291,\n 4.4511, 4.3740, 4.2977, 4.4444, 4.3687, 4.5140, 4.6580, 4.5826,\n 4.5079, 4.6503, 4.5762, 4.7173, 4.6437, 4.7834, 4.9221, 4.8488,\n 4.9862, 5.1225, 5.0496, 4.9774, 4.9058, 5.0406, 4.9695, 5.1031,\n 5.0325, 5.1650, 5.2965, 5.2262, 5.3567, 5.4863, 5.4163, 5.3468,\n 5.2779, 5.4062, 5.3378, 5.4650, 5.3970, 5.5233, 5.6488, 5.5811,\n 5.7056, 5.8294, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.5544, 5.6760, 5.7967, 5.7319, 5.6675, 5.6036, 5.5402, 5.6598,\n 5.5967, 5.7155, 5.8336, 5.7707, 5.7082, 5.6462, 5.5846, 5.7016,\n 5.6403, 5.7565, 5.8721, 5.8110, 5.9258, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.7987, 5.7394, 5.8525, 5.7934, 5.9059, 6.0177, 5.9588,\n 6.0700, 6.1807, 6.1219, 6.0635, 6.0054, 5.9477, 5.8904, 6.0000,\n 5.9429, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is riding on a horse.\nSentence 2: A girl is riding a horse.\nSimilarity score:", + "true_label": 2.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.5871, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.7746, 0.9258, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.4121, 0.5477,\n 0.5005, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.3073, 0.4377, 0.3928, 0.5222,\n 0.6509, 0.6058, 0.7336, 0.6885, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.4189, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 7.9530, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.6359, 8.5396, 8.4444, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 9.8702, 9.7869, 9.8975, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.5109, 11.6082, 11.5329, 11.6297, 11.7261, 11.6514, 11.5771,\n 11.6731, 11.7687, 11.8638, 11.7901, 11.7169, 11.6441, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.9487, 12.0419, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is riding a bicycle.\nSentence 2: A monkey is riding a bike.\nSimilarity score:", + "true_label": 2.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.0206,\n -1.0675, -1.1140, -0.9584, -0.8040, -0.6508, -0.4988, -0.5466, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.0473, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.4070, 0.3607, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.2596, 0.3884, 0.5164, 0.6437, 0.5991, 0.5548, 0.6810,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.9119, 0.8682, 0.8248, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "64.1%", + "z-score": "12.7", + "p value": "2.31e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.2525, 8.4017, 8.5491, 8.6948, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.3721, 9.2600, 9.3901, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.8237, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 9.8792, 10.0000, 10.1199, 10.0188, 10.1379, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 10.8960, 11.0070, 11.1172, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.3791, 11.4857,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.6206, 11.5311, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.3899, 11.4935, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.6311, 11.7326, 11.8336, 11.7498, 11.8503, 11.9504, 11.8673,\n 11.9669, 12.0660, 11.9837, 12.0824, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.6283, 12.5495, 12.4713, 12.3935, 12.4880, 12.5820, 12.5049, 12.4283,\n 12.5221, 12.6153, 12.5394, 12.4638, 12.5568, 12.6494, 12.5745, 12.5000,\n 12.5923, 12.6841, 12.6102, 12.5367, 12.6283, 12.7195])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is slicing potatoes.\nSentence 2: A woman is peeling potato.\nSimilarity score:", + "true_label": 2.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.6430, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.6547, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 9.7778, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.0779, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.1621, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.5797, 11.6827, 11.7851, 11.6990, 11.8010,\n 11.7157, 11.6311, 11.7326, 11.8336, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.7735, 12.8680, 12.9621, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.1746, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is peeling a potato.\nSentence 2: A man is slicing potato.\nSimilarity score:", + "true_label": 2.4000000953674316, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.9%", + "z-score": "3.18", + "p value": "0.000727", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.5852,\n 1.5213, 1.6823, 1.8419, 1.7778, 1.7143, 1.6514, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.8682, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.8091, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.8962, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.9524, 1.8983, 1.8446, 1.9799, 2.1143, 2.0605,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.9813, 1.9291, 2.0604, 2.1909,\n 2.1386, 2.0866, 2.0350, 1.9837, 2.1125, 2.2406, 2.1892, 2.3163,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.2387, 2.1884, 2.3131, 2.4371,\n 2.5604, 2.5099, 2.6323, 2.7541, 2.7036, 2.8245, 2.9448, 2.8943,\n 2.8440, 2.9633, 3.0821, 3.0317, 2.9817, 2.9320, 3.0496, 3.1667,\n 3.1169, 3.0674, 3.1836])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.6667,\n 1.5671, 1.7963, 2.0211, 1.9215, 1.8240, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.6131, 1.8185, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.8766, 1.7942, 1.9870, 2.1773,\n 2.0948, 2.0135, 1.9333, 1.8543, 2.0397, 1.9612, 2.1436, 2.3238,\n 2.2453, 2.1678, 2.3448, 2.2678, 2.4423, 2.3658, 2.5378, 2.7080,\n 2.6316, 2.5560, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.3570,\n 2.2862, 2.2162, 2.3791, 2.3094, 2.4703, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.2884, 2.2222, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.3354, 2.2711, 2.2074, 2.3586, 2.2952, 2.4449, 2.3817, 2.5298,\n 2.4669, 2.4045, 2.5508, 2.4887, 2.4271, 2.3660, 2.5103, 2.4495,\n 2.5925, 2.5318, 2.6735, 2.6131, 2.5532, 2.4938, 2.6336, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.5373, 2.4797, 2.6163, 2.5589, 2.5019,\n 2.6370, 2.5802, 2.5238, 2.4678, 2.4122, 2.3570, 2.4902, 2.4351,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.5343, 2.4803, 2.4267, 2.3735,\n 2.3206, 2.4495, 2.3967, 2.5247, 2.4721, 2.4198, 2.5466, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.4597, 2.4099, 2.3603, 2.4822, 2.4327, 2.5538,\n 2.5044, 2.6247, 2.5754, 2.5265, 2.4778, 2.5969, 2.5483, 2.5000,\n 2.4520, 2.5700, 2.6874, 2.6393, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + } + ], + "metrics": { + "pearson_corr_without_watermark": NaN, + "pearson_corr_with_watermark": -0.3385932975390179, + "spearman_corr_without_watermark": NaN, + "spearman_corr_with_watermark": -0.3389426353569145 + } + }, + "validation": { + "results": [ + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man with a hard hat is dancing.\nSentence 2: A man wearing a hard hat is dancing.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.5774,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.4174, 0.3698, 0.5069, 0.6430, 0.5952, 0.5477,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.6437, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.8682, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 9.7986, 9.9187, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.9291, 10.8363, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.3373, 11.2493, 11.1621,\n 11.2674, 11.1810, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.6311, 11.5471, 11.6487, 11.5655, 11.4829, 11.4009, 11.5022,\n 11.4209, 11.3402, 11.4411, 11.3610, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.7787, 11.7000, 11.6220, 11.5444, 11.6425, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.9730, 11.8973, 11.9928, 11.9176,\n 11.8429, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.2794, 12.2068, 12.1347, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A young child is riding a horse.\nSentence 2: A child is riding a horse.\nSimilarity score:", + "true_label": 4.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.9216, -2.9584, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -3.0657, -3.1013, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.0987, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.3044, -3.3381, -3.3716, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 3.7626, 4.0012, 3.8490, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.6098, 3.8297, 3.7009, 3.9158, 3.7897, 3.6667,\n 3.5466, 3.7559, 3.9614, 3.8431, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.3614, 4.5461, 4.4371, 4.6188,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.7469, 6.6469,\n 6.5483, 6.4510, 6.5970, 6.5008, 6.6454, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.4663, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.9303, 6.8414, 6.7536, 6.8889, 6.8019, 6.7159, 6.8500, 6.9830,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.7155, 7.8406, 7.9649, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.8443, 8.7610, 8.8778, 8.9940, 8.9113,\n 9.0267, 8.9448, 9.0595, 8.9783, 8.8978, 8.8179, 8.9319, 8.8527,\n 8.7742, 8.6963, 8.8095, 8.9221, 8.8448, 8.9567, 8.8800, 8.8039,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.9851, 9.0944, 9.2032, 9.1287,\n 9.0548, 9.1629, 9.0895, 9.0167, 9.1242, 9.2311, 9.1587, 9.2651,\n 9.3708, 9.2990, 9.4042, 9.5089, 9.6130, 9.7167, 9.8198, 9.9224,\n 9.8510, 9.9531, 10.0547, 9.9837, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.3148, 10.2447, 10.3439, 10.4427, 10.5410, 10.6389, 10.5692, 10.5000,\n 10.4312, 10.5286, 10.4603, 10.3923, 10.4893, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is feeding a mouse to a snake.\nSentence 2: The man is feeding a mouse to the snake.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.7807, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.4777, -1.5159, -1.5539, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.1024, 10.2283, 10.1124, 9.9980, 10.1234, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.1614, 10.2833, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.4304, 10.3257, 10.2222, 10.3411, 10.4592, 10.3571, 10.2562,\n 10.1564, 10.0577, 10.1754, 10.0779, 10.1948, 10.0984, 10.0029, 9.9085,\n 9.8150, 9.7224, 9.6307, 9.7473, 9.6566, 9.5668, 9.4778, 9.3897,\n 9.5057, 9.4185, 9.5338, 9.4474, 9.3617, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.6471, 9.7590, 9.8702, 9.7869, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.0611, 10.1692, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.3284, 10.2486, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.5366, 10.6397, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.8170, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.0904, 11.1886, 11.2864, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.2607, 11.3572, 11.2848, 11.2129, 11.3091, 11.4047, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is playing the guitar.\nSentence 2: A man is playing guitar.\nSimilarity score:", + "true_label": 2.4000000953674316, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 10.8421, 10.7671, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.5235, 11.4525, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is playing the flute.\nSentence 2: A man is playing a flute.\nSimilarity score:", + "true_label": 2.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is cutting an onion.\nSentence 2: A man is cutting onions.\nSimilarity score:", + "true_label": 2.615000009536743, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.9218, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.6667,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.3277, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.0590, 13.1547, 13.0690, 13.1644, 13.0795, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.0185, 12.9363, 13.0307, 12.9491, 12.8680, 12.7876, 12.7077,\n 12.6283, 12.7226, 12.6439, 12.5657, 12.4880, 12.4109, 12.5049, 12.4283,\n 12.3523, 12.2767, 12.3705, 12.2954, 12.2209, 12.3143, 12.2403, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.2992, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is erasing a chalk board.\nSentence 2: The man is erasing the chalk board.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.0371, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.2179, -1.2566, -1.2950, -1.1667,\n -1.2052, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.6395,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 6.8876, 6.7769, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 8.8602, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 9.8858, 9.7912, 9.6977,\n 9.6050, 9.7224, 9.6307, 9.5400, 9.6566, 9.7725, 9.8877, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.1151, 11.0309, 10.9473, 11.0517,\n 10.9689, 10.8867, 10.9906, 11.0940, 11.1968, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.4411, 11.5414, 11.6412, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.3764, 12.2987, 12.2214, 12.1447, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.2954, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.5923, 12.6841, 12.7756, 12.7017, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is carrying a boy.\nSentence 2: A woman is carrying her baby.\nSimilarity score:", + "true_label": 2.3329999446868896, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "89", + "Fraction of T in Greenlist": "44.7%", + "z-score": "6.43", + "p value": "6.57e-11", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 2.6681, 2.8943, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.6679, 2.8823, 2.7757, 2.9856, 3.1918, 3.0861,\n 3.2883, 3.1840, 3.3824, 3.2796, 3.1787, 3.0796, 3.2733, 3.1754,\n 3.3657, 3.5533, 3.4562, 3.6407, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.7087, 3.8819, 3.7916, 3.9624, 3.8730,\n 4.0415, 3.9530, 3.8657, 3.7796, 3.9452, 3.8600, 4.0234, 4.1851,\n 4.1003, 4.2601, 4.1761, 4.3339, 4.2507, 4.1684, 4.0872, 4.2426,\n 4.1621, 4.3158, 4.4680, 4.3879, 4.5384, 4.4590, 4.6079, 4.5291,\n 4.4511, 4.3740, 4.5210, 4.4444, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.7252, 4.8666, 4.7916, 4.7173, 4.6437, 4.7834, 4.7104, 4.8488,\n 4.9862, 4.9135, 5.0496, 4.9774, 5.1123, 5.0406, 4.9695, 4.8990,\n 5.0325, 4.9624, 5.0948, 5.2262, 5.1564, 5.2868, 5.2175, 5.3468,\n 5.2779, 5.2096, 5.1419, 5.2699, 5.2025, 5.3295, 5.4557, 5.3886,\n 5.5138, 5.4471, 5.5714, 5.5051, 5.4393, 5.3740, 5.4971, 5.4322,\n 5.5544, 5.6760, 5.6112, 5.7319, 5.6675, 5.7874, 5.7234, 5.6598,\n 5.5967, 5.7155, 5.6527, 5.7707, 5.8880, 5.8254, 5.9420, 5.8797,\n 5.9956, 5.9336, 5.8721, 5.8110, 5.9258, 5.8650, 5.9792, 6.0927,\n 6.0321, 6.1449, 6.0846, 6.1968, 6.1367, 6.0770, 6.0177, 6.1290,\n 6.0700, 6.1807, 6.2908, 6.2319, 6.3414, 6.2828, 6.3917, 6.3333,\n 6.2753, 6.2177, 6.3258, 6.2684, 6.3758, 6.4828, 6.4256])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Three men are playing guitars.\nSentence 2: Three men are on stage playing guitars.\nSimilarity score:", + "true_label": 3.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.8728, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 2.0428, 2.2576, 2.1602,\n 2.0647, 1.9711, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.2449, 0.3904, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.7396, 0.8755, 1.0105, 0.9615, 0.9129,\n 1.0465, 0.9979, 0.9497, 1.0820, 1.2136, 1.3443, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.4881, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.5492, 1.6737, 1.6262, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.6843, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.1156, 3.3665, 3.2205, 3.4641, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.5632, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.4762, 11.3899, 11.4935, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.7106, 13.6313, 13.7215, 13.6429, 13.7327, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 4.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman peels a potato.\nSentence 2: A woman is peeling a potato.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 2.3163, 2.2011, 2.4371, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 2.8804, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823, 2.8868,\n 3.0793, 2.9848, 3.1741, 3.3607, 3.5447, 3.4503, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.0000, 3.1789, 3.0924, 3.2686, 3.4427, 3.3566,\n 3.2717, 3.1879, 3.1052, 3.2757, 3.4442, 3.6109, 3.5282, 3.4466,\n 3.3659, 3.2863, 3.2077, 3.1300, 3.2928, 3.2157, 3.3764, 3.5355,\n 3.4586, 3.3826, 3.3075, 3.2332, 3.3895, 3.5443, 3.6977, 3.6233,\n 3.5496, 3.4768, 3.4047, 3.3333, 3.2627, 3.4130, 3.3428, 3.4915,\n 3.6389, 3.5689, 3.4995, 3.6452, 3.5762, 3.7205, 3.8636, 4.0056,\n 3.9365, 3.8680, 3.8002, 3.7330, 3.6664, 3.6004, 3.7399, 3.6742,\n 3.8125, 3.9497, 3.8841, 3.8191, 3.9549, 3.8903, 4.0249, 4.1586,\n 4.2914, 4.2267, 4.1625, 4.0988, 4.0356, 3.9729, 3.9107, 4.0415,\n 3.9795, 4.1092, 4.2381, 4.1763, 4.1150, 4.0541, 3.9936, 4.1210,\n 4.2475, 4.3733, 4.3128, 4.2527, 4.1931, 4.1338, 4.0750, 4.0166,\n 4.1406, 4.0825, 4.2056, 4.3280, 4.2699, 4.2122, 4.1549, 4.0980,\n 4.2191, 4.3395, 4.4593, 4.4023, 4.3456, 4.2893, 4.2334, 4.1779,\n 4.1226, 4.2409, 4.1859, 4.3033, 4.4202, 4.3652, 4.3106, 4.2563,\n 4.2023, 4.3180, 4.4331, 4.5476, 4.4936, 4.4399, 4.3864, 4.3333,\n 4.2805, 4.2280, 4.3412, 4.2889, 4.4014, 4.5134, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: People are playing cricket.\nSentence 2: Men are playing cricket.\nSimilarity score:", + "true_label": 3.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.3206, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.8074,\n 1.7321, 1.9127, 1.8378, 1.7638, 1.6908, 1.8677, 2.0426, 1.9695,\n 1.8972, 1.8257, 1.9973, 1.9262, 2.0954, 2.0247, 1.9548, 1.8856,\n 2.0517, 2.2162, 2.1470, 2.0785, 2.0107, 2.1723, 2.3324, 2.2646,\n 2.1974, 2.3552, 2.2884, 2.4444, 2.3779, 2.3120, 2.2468, 2.4004,\n 2.5527, 2.4874, 2.4227, 2.3586, 2.2952, 2.2323, 2.3817, 2.3190,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 1.9242, 2.0682, 2.2111, 2.3529, 2.2943, 2.2361, 2.3764,\n 2.3183, 2.2608, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.0068, 1.9524, 1.8983, 1.8446, 1.7913, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 0.9981, 1.1251, 1.0788, 1.0328, 0.9870, 1.1127, 1.0670, 1.0215,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 6.8931,\n 7.0379, 6.9378, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.2104, 7.1152, 7.0211, 6.9282, 6.8364, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.3333, 7.4655, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.6785, 7.8065, 7.9336, 7.8463, 7.9724, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.4057, 8.5249, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.3289, 10.4330, 10.3557, 10.2790, 10.3827, 10.3065, 10.4097, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.7415, 10.8421, 10.9422, 11.0418, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.4300, 11.3572, 11.2848, 11.2129, 11.3091, 11.2376, 11.1667,\n 11.2624, 11.1919, 11.2872, 11.3820, 11.4765, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A man is playing a flute.\nSimilarity score:", + "true_label": 1.5829999446868896, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The cougar is chasing the bear.\nSentence 2: A cougar is chasing a bear.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.3244, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.2603, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, 0.0907, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.1111, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 7.8667, 8.0167, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 9.8852, 9.7738, 9.8995,\n 9.7897, 9.6813, 9.5743, 9.6995, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.9769, 10.8801, 10.9917, 11.1026, 11.0070, 11.1172, 11.0227,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.4286, 12.5289, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 13.1364, 13.2324, 13.1453,\n 13.2410, 13.1547, 13.2499, 13.3447, 13.4390, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.0936, 14.1842, 14.2744,\n 14.3642, 14.4536, 14.5426, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.7293, 14.6473, 14.7348, 14.6534, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.6738, 14.7601, 14.6812, 14.6027, 14.6889, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man cut down a tree with an axe.\nSentence 2: A man chops down a tree with an axe.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.6830, -2.5460, -2.5820, -2.6178, -2.6534, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.6%", + "z-score": "11.2", + "p value": "1.26e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 8.9763, 8.8833, 9.0057, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.3951, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.0133, 9.9278, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.7959, 10.7175, 10.8200, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.0702, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.2126, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.2129, 11.1415, 11.0705, 11.1667,\n 11.0961, 11.1919, 11.1218, 11.0521, 11.1475, 11.2424])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 3.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man is playing the guitar.\nSentence 2: A man is playing a guitar.\nSimilarity score:", + "true_label": 4.908999919891357, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.3638, 7.5032, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.4233, 7.3333, 7.4655, 7.3765, 7.2884, 7.2012,\n 7.3322, 7.2459, 7.3758, 7.2904, 7.2058, 7.1220, 7.0391, 6.9570,\n 7.0857, 7.0043, 6.9237, 6.8439, 6.7648, 6.8922, 7.0187, 7.1443,\n 7.0658, 6.9879, 6.9107, 6.8343, 6.9587, 6.8828, 6.8076, 6.7330,\n 6.6591, 6.7823, 6.9048, 7.0265, 6.9529, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.5933, 7.7096, 7.8253, 7.7524, 7.8673,\n 7.7949, 7.9091, 8.0227, 8.1356, 8.2479, 8.3595, 8.2874, 8.3984,\n 8.5088, 8.6186, 8.5469, 8.4757, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.6603, 8.7676, 8.8744, 8.9806, 9.0863, 9.0164, 8.9469, 9.0520,\n 8.9830, 8.9145, 9.0190, 9.1230, 9.0549, 8.9872, 9.0906, 9.1936,\n 9.2960, 9.3980, 9.3306, 9.4321, 9.3651, 9.2986, 9.2324, 9.3333,\n 9.2676, 9.3680, 9.3026, 9.2376, 9.3375, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is finding something.\nSentence 2: A woman is slicing something.\nSimilarity score:", + "true_label": 0.800000011920929, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.4923, 1.4427, 1.3933, 1.3443, 1.4743, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.4087, 1.3607, 1.3131, 1.4402, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.3933, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.3197, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.6667,\n 1.5671, 1.7963, 2.0211, 1.9215, 1.8240, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.6131, 1.8185, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.8766, 1.7942, 1.9870, 2.1773,\n 2.0948, 2.0135, 1.9333, 1.8543, 2.0397, 1.9612, 2.1436, 2.3238,\n 2.2453, 2.1678, 2.3448, 2.2678, 2.4423, 2.3658, 2.5378, 2.7080,\n 2.6316, 2.5560, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.3570,\n 2.2862, 2.2162, 2.3791, 2.3094, 2.4703, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.2884, 2.2222, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.3354, 2.2711, 2.2074, 2.3586, 2.2952, 2.4449, 2.3817, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.4887, 2.4271, 2.3660, 2.5103, 2.4495,\n 2.5925, 2.5318, 2.6735, 2.6131, 2.5532, 2.4938, 2.6336, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.5373, 2.4797, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.5802, 2.5238, 2.4678, 2.4122, 2.3570, 2.4902, 2.4351,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.5343, 2.4803, 2.4267, 2.3735,\n 2.3206, 2.4495, 2.3967, 2.5247, 2.4721, 2.4198, 2.3679, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.4597, 2.4099, 2.3603, 2.4822, 2.4327, 2.5538,\n 2.5044, 2.6247, 2.5754, 2.5265, 2.4778, 2.5969, 2.5483, 2.5000,\n 2.4520, 2.5700, 2.6874, 2.6393, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The girl sang into a microphone.\nSentence 2: The lady sang into the microphone.\nSimilarity score:", + "true_label": 2.4000000953674316, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, -0.1952, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.7927, 8.9178, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.3408, 9.4606, 9.3678, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.7622, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.3683, 10.2837, 10.3923,\n 10.5002, 10.4164, 10.3333, 10.4407, 10.5475, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.9091, 10.8282, 10.7480, 10.8515, 10.9545,\n 11.0569, 10.9773, 11.0793, 11.0004, 11.1018, 11.2028, 11.3032, 11.2250,\n 11.3249, 11.4244, 11.3468, 11.2698, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.8014, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.6949, 11.7901, 11.8849, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is climbing a rope.\nSentence 2: A man climbs a rope.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.4233, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.2894, -0.3299, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 10.9585, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.4384,\n 12.3289, 12.4370, 12.5443, 12.4365, 12.3299, 12.4370, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.6508, 12.5485, 12.6529, 12.7567, 12.6557,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.4674, 13.3710, 13.4691, 13.5668, 13.4715,\n 13.3770, 13.4745, 13.5714, 13.6679, 13.7638, 13.6707, 13.7663, 13.6742,\n 13.5827, 13.6781, 13.5876, 13.6826, 13.7772, 13.8713, 13.7818, 13.8756,\n 13.7870, 13.8804, 13.9735, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.3449, 14.4355, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.7113, 14.7998, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.9860, 15.0726, 14.9903, 15.0766, 14.9950, 15.0810, 15.1667,\n 15.2520, 15.1712, 15.0909, 15.1761, 15.2609, 15.1813, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Kittens are eating food.\nSentence 2: Kittens are eating from dishes.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.0596, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.4371,\n -2.4736, -2.3368, -2.3734, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.1167, -1.9843, -2.0212, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.0156, -2.0515, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.0494, 1.9245, 2.1831, 2.0605, 2.3113, 2.5560,\n 2.7952, 2.6726, 2.9055, 2.7852, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.5568, 2.4495, 2.6679, 2.8823, 3.0929, 2.9856, 3.1918, 3.3947,\n 3.5942, 3.7905, 3.9837, 4.1740, 4.3614, 4.5461, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 6.8931,\n 6.7931, 6.6944, 6.5970, 6.7416, 6.6454, 6.7886, 6.6935, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.2354, 6.1470, 6.0596, 6.2008, 6.1143,\n 6.2541, 6.3928, 6.3070, 6.4444, 6.3595, 6.4957, 6.6308, 6.7648,\n 6.6804, 6.8133, 6.9451, 7.0759, 7.2058, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.5484, 7.6734, 7.5910, 7.5094, 7.4286, 7.3485,\n 7.4724, 7.3930, 7.5161, 7.4373, 7.5595, 7.6808, 7.8014, 7.7232,\n 7.8429, 7.9619, 8.0801, 8.1976, 8.1198, 8.0427, 8.1594, 8.2754,\n 8.1988, 8.3140, 8.2381, 8.3526, 8.4664, 8.5796, 8.5041, 8.4293,\n 8.5417, 8.4674, 8.5792, 8.5054, 8.4322, 8.5433, 8.6537, 8.7636,\n 8.8728, 8.8000, 8.7278, 8.8364, 8.7646, 8.8726, 8.9800, 9.0869,\n 9.1932, 9.2990, 9.2276, 9.1567, 9.2619, 9.3665, 9.4707, 9.5743,\n 9.6774, 9.6069, 9.5369, 9.6394, 9.7415, 9.6719, 9.7735, 9.7043,\n 9.6356, 9.7367, 9.8373, 9.9374, 10.0371, 10.1363, 10.0679, 10.0000,\n 10.0987, 10.0312, 10.1295, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is speaking.\nSentence 2: A man is spitting.\nSimilarity score:", + "true_label": 0.6359999775886536, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -0.9909, -1.0412, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.3531, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.6186, 9.5346, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.1423, 10.0631, 10.1695, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.3827, 10.3065, 10.4097, 10.3341,\n 10.4367, 10.3617, 10.4638, 10.5654, 10.4909, 10.4170, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.5998, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is cooking eggs.\nSentence 2: A woman is cooking something.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.7896, 0.7461, 0.8682, 0.9897, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.5384, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.7442, 4.6667, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.5448,\n 5.6725, 5.7994, 5.7295, 5.6602, 5.5915, 5.5233, 5.4557, 5.5811,\n 5.5138, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.7411, 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.7787, 5.7155, 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 6.1632, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A man is playing a trumpet.\nSimilarity score:", + "true_label": 1.7139999866485596, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.0844, 4.9747, 5.1490, 5.0410, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.4000, 5.5630, 5.7242, 5.6220, 5.7812, 5.6804,\n 5.8377, 5.7382, 5.6401, 5.7955, 5.6986, 5.6032, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.4663, 6.3768, 6.2883, 6.4283, 6.3408,\n 6.4795, 6.6171, 6.7536, 6.8889, 7.0231, 7.1563, 7.2884, 7.4194,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.8463, 7.9724, 7.8859, 7.8003,\n 7.9254, 8.0497, 7.9649, 7.8808, 7.7976, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.3231, 8.2413, 8.1602, 8.2793, 8.1989, 8.3172,\n 8.2375, 8.3550, 8.4718, 8.3927, 8.5088, 8.6241, 8.7388, 8.6603,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.0257, 8.9502, 9.0601, 9.1694, 9.0944, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.6635, 9.5896, 9.5161, 9.4432,\n 9.5485, 9.4761, 9.5808, 9.5089, 9.4375, 9.5416, 9.6452, 9.7483,\n 9.8510, 9.7800, 9.7095, 9.6394, 9.7415, 9.8431, 9.9442, 9.8746,\n 9.8054, 9.7367, 9.6684, 9.7690, 9.7011, 9.6336, 9.7337, 9.6667,\n 9.7663, 9.6996, 9.6334, 9.7325, 9.6666, 9.7653, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: An animal is chewing on something.\nSentence 2: An animal is chewing on a key chain.\nSimilarity score:", + "true_label": 3.200000047683716, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.5134, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.4944, 0.4481, 0.5808, 0.5345,\n 0.4885, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.6885, 0.6437, 0.5991, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.5375, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.0342, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.1933, 11.2924, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.6217, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.0209, 11.9487, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a flute.\nSentence 2: A man is playing guitar.\nSimilarity score:", + "true_label": 2.1670000553131104, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.4006, 6.2994, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640,\n 6.2725, 6.1820, 6.0927, 6.2354, 6.1470, 6.0596, 6.2008, 6.1143,\n 6.2541, 6.3928, 6.3070, 6.2222, 6.1383, 6.2755, 6.4116, 6.5465,\n 6.4632, 6.3807, 6.5144, 6.4327, 6.5653, 6.6968, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.9237, 6.8439, 6.9714, 6.8922, 7.0187, 7.1443,\n 7.0658, 6.9879, 6.9107, 7.0353, 7.1590, 7.0823, 7.2051, 7.1291,\n 7.0537, 7.1755, 7.1007, 7.0265, 7.1474, 7.0737, 7.0007, 6.9282,\n 7.0481, 6.9762, 6.9048, 7.0238, 6.9529, 6.8825, 7.0006, 7.1181,\n 7.2348, 7.3508, 7.4662, 7.5809, 7.6950, 7.6246, 7.5548, 7.4855,\n 7.5988, 7.7114, 7.6424, 7.7544, 7.6859, 7.7971, 7.7291, 7.6615,\n 7.7720, 7.7048, 7.6381, 7.7480, 7.6816, 7.7908, 7.8995, 7.8335,\n 7.7679, 7.8759, 7.9833, 7.9181, 7.8533, 7.9601, 7.8956, 7.8316,\n 7.7679, 7.8740, 7.8107, 7.7478, 7.6853, 7.7907, 7.7285, 7.6667,\n 7.7715, 7.7099, 7.8142, 7.9179, 7.8567, 7.7958, 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing soccer.\nSentence 2: A man is playing flute.\nSimilarity score:", + "true_label": 1.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.0943, 5.9932, 6.1471, 6.0474, 5.9491, 6.1012, 6.2517, 6.1546,\n 6.0587, 6.2075, 6.1128, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.9282, 7.0662, 6.9743, 6.8834, 7.0201,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.0219, 7.9336, 8.0598, 7.9724, 7.8859, 8.0111,\n 8.1354, 8.0497, 7.9649, 8.0882, 8.2107, 8.3324, 8.2483, 8.3691,\n 8.2858, 8.2032, 8.1214, 8.2413, 8.1602, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.3550, 8.4718, 8.3927, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.4299, 9.5381, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.2029, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.6665, 10.5921, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.7451, 10.6722, 10.5998, 10.5278, 10.6271, 10.7258,\n 10.6544, 10.5833, 10.6817, 10.6111, 10.7090, 10.6389, 10.7363, 10.6667,\n 10.5974, 10.6944, 10.6256, 10.5573, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A girl is riding a horse.\nSentence 2: A girl is riding a bicycle.\nSimilarity score:", + "true_label": 1.9170000553131104, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.1371, 5.0000,\n 4.8662, 5.0623, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.0553, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.9079, 7.8168, 7.9460, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.7267, 8.8443, 8.9612, 8.8778, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.6148, 9.7224, 9.6456, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.9184, 9.8433, 9.9481, 10.0523, 9.9778,\n 10.0814, 10.0074, 10.1106, 10.0371, 9.9642, 10.0668, 9.9944, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 10.8770, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 10.8872, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is lifting weights in a garage.\nSentence 2: A man is lifting weights.\nSimilarity score:", + "true_label": 4.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.7%", + "z-score": "13.2", + "p value": "3.75e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.5218, 8.6702, 8.8168, 8.9618, 8.8271, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.7250, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 10.9669, 10.8542, 10.7429, 10.8616, 10.7518, 10.8699,\n 10.7616, 10.6547, 10.7722, 10.6667, 10.7835, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.9288, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 12.8766, 12.7812, 12.8819, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.8017, 12.9011, 13.0000, 12.9085, 12.8179, 12.9165, 13.0146, 12.9249,\n 12.8359, 12.9337, 12.8456, 12.7581, 12.6713, 12.7690, 12.6830, 12.7802,\n 12.8769, 12.7918, 12.8881, 12.9840, 12.8997, 12.8160, 12.9116, 13.0067,\n 12.9238, 12.8414, 12.7597, 12.8546, 12.7735, 12.6930, 12.7876, 12.7077,\n 12.6283, 12.5495, 12.4713, 12.5657, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.2864, 13.2118])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: An animal is biting a persons finger.\nSentence 2: A slow loris is biting a persons finger.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 1.0206,\n 1.1692, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.1711, 1.1183, 1.0659, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.4662, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 1.1794, 1.3114, 1.2623, 1.2136, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.0342, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.1933, 11.2924, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.4581, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.1141, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is reading.\nSentence 2: A woman is kneading dough.\nSimilarity score:", + "true_label": 1.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.1644, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.3746,\n 0.3267, 0.4652, 0.6029, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.8540, 0.8066, 0.9382, 0.8909,\n 1.0215, 1.1513, 1.2804, 1.2326, 1.1852, 1.3131, 1.2657, 1.2185,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 1.1461, 1.2700, 1.3933, 1.3474, 1.3019, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.3197, 1.2752, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 2.5924, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 3.9337, 3.7905, 3.6515,\n 3.5165, 3.7417, 3.6098, 3.8297, 4.0451, 3.9158, 3.7897, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.5118, 4.3970, 4.5850, 4.4721, 4.6571, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.6904, 4.8669, 5.0410, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.3383, 5.2350, 5.1333, 5.2981, 5.1978, 5.3605, 5.5213, 5.4222,\n 5.3245, 5.2281, 5.1332, 5.2915, 5.4482, 5.6032, 5.7566, 5.9084,\n 5.8139, 5.7207, 5.6286, 5.5377, 5.6875, 5.5976, 5.7458, 5.8926,\n 5.8035, 5.7155, 5.6285, 5.5426, 5.6874, 5.8310, 5.9732, 6.1143,\n 6.2541, 6.1685, 6.0838, 6.0000, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.2459, 6.1644, 6.0837, 6.2183, 6.1382, 6.2716, 6.4040, 6.5354,\n 6.6658, 6.7952, 6.7155, 6.6365, 6.5583, 6.4807, 6.6089, 6.5320,\n 6.6591, 6.7854, 6.7090, 6.6332, 6.7585, 6.6833, 6.8076, 6.9310,\n 7.0537, 7.1755, 7.2966, 7.2217, 7.1474, 7.0737, 7.0007, 7.1207,\n 7.0481, 7.1673, 7.2857, 7.2136, 7.1421, 7.0711, 7.0006, 7.1181,\n 7.2348, 7.3508, 7.4662, 7.5809, 7.5106, 7.4409, 7.3717, 7.3030,\n 7.4168, 7.3485, 7.4616, 7.5740, 7.5061, 7.4386, 7.3717, 7.3051,\n 7.4167, 7.5277, 7.6381, 7.7480, 7.8572, 7.7908, 7.7249, 7.6594,\n 7.5944, 7.7028, 7.6381, 7.7460, 7.8533, 7.7889, 7.7249, 7.6613,\n 7.5981, 7.7047, 7.8107, 7.9162, 8.0212, 8.1258, 8.0627, 8.0000,\n 7.9377, 7.8758, 7.9796, 7.9179, 8.0212, 8.1240, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is crying.\nSentence 2: A woman is dancing.\nSimilarity score:", + "true_label": 0.6000000238418579, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.0426, 0.9661, 1.1648, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.8378, 1.7638, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.3856, 1.5511, 1.4863, 1.4222, 1.5852,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.4412, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.3725, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 1.1239,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.0598, 1.0105, 0.9615, 0.9129,\n 1.0465, 0.9979, 0.9497, 0.9017, 1.0338, 0.9858, 1.1169, 1.0690,\n 1.0215, 1.1513, 1.2804, 1.2326, 1.1852, 1.1380, 1.2657, 1.3926,\n 1.3453, 1.4713, 1.4241, 1.3771, 1.5020, 1.4551, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.5621, 1.5159, 1.4699, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.5298, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.9245, 1.8034, 2.0605, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.7761, 2.9913, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.9814, 2.8830, 3.0796, 3.2733, 3.1754,\n 3.0793, 2.9848, 2.8919, 3.0806, 2.9887, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.3558, 3.2667, 3.4438, 3.3556, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 4.0166, 4.1761, 4.0931, 4.2507, 4.4066, 4.3241, 4.2426,\n 4.1621, 4.0825, 4.2359, 4.3879, 4.3086, 4.4590, 4.6079, 4.5291,\n 4.6765, 4.8226, 4.7442, 4.6667, 4.8111, 4.9543, 4.8772, 5.0190,\n 4.9424, 5.0829, 5.2223, 5.1461, 5.2842, 5.4212, 5.3455, 5.2705,\n 5.1962, 5.3316, 5.2578, 5.1848, 5.1123, 5.2463, 5.3793, 5.3072,\n 5.4391, 5.5701, 5.4983, 5.4272, 5.3567, 5.2868, 5.2175, 5.3468,\n 5.2779, 5.4062, 5.5336, 5.4650, 5.5915, 5.7171, 5.6488, 5.5811,\n 5.5138, 5.4471, 5.3810, 5.3153, 5.2501, 5.3740, 5.4971, 5.4322,\n 5.5544, 5.6760, 5.6112, 5.5470, 5.4832, 5.4199, 5.3571, 5.2947,\n 5.2327, 5.3526, 5.4718, 5.4100, 5.5284, 5.6462, 5.5846, 5.5234,\n 5.6403, 5.5794, 5.5189, 5.6349, 5.5747, 5.6899, 5.8046, 5.7446,\n 5.8585, 5.9718, 5.9120, 5.8525, 5.7934, 5.9059, 5.8470, 5.7885,\n 5.7304, 5.8420, 5.9530, 5.8951, 6.0054, 6.1153, 6.0575, 6.0000,\n 5.9429, 5.8861, 5.9950, 6.1034, 6.0468, 6.1546, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The lady cracked an egg into a bowl.\nSentence 2: The man is cracking eggs into a bowl.\nSimilarity score:", + "true_label": 2.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.6654, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.8095, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 7.8512, 7.6996, 7.5514, 7.7152,\n 7.5707, 7.4294, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 8.1016, 7.9704, 7.8416, 7.9931, 7.8667, 7.7426, 7.6206,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.4138, 8.3138, 8.4449, 8.3463, 8.4763, 8.3789,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.5553, 8.4679, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 9.0453, 8.9612, 8.8778, 8.9940, 8.9113,\n 8.8294, 8.7482, 8.8636, 8.7831, 8.8978, 8.8179, 8.9319, 8.8527,\n 8.7742, 8.8874, 9.0000, 8.9221, 9.0340, 9.1452, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.7828, 9.7072, 9.8131, 9.9184, 10.0231, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 10.9178, 11.0165, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.5948, 11.5235, 11.6179, 11.7120, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A band is performing on a stage.\nSentence 2: A band is playing onstage.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -0.8076, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.4201, -0.2791, -0.3246, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.6737, -0.7139, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.6598, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.9528, 5.1326, 5.0190, 4.9075,\n 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.0829, 7.9853, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 8.7913, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.0370, 8.9496, 9.0680, 8.9815,\n 8.8958, 9.0134, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 9.9249, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.0165, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.4300, 11.3572, 11.4533, 11.5489, 11.6441, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Elephants are walking down a trail.\nSentence 2: A herd of elephants are walking along a trail.\nSimilarity score:", + "true_label": 4.599999904632568, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.7295, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.5927, -0.6333, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.0984, 10.2146, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.3695, 10.4829, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.4312, 10.3409, 10.4524, 10.3630, 10.2743, 10.1865, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.4537, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.3333, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.9091, 10.8282, 10.9317, 10.8515, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.8170, 10.9176, 10.8421, 10.9422, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.3608, 11.2864, 11.2126, 11.1392, 11.2366,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.5489, 11.6441, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A man plays the guitar.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.3638, 7.5032, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.4233, 7.3333, 7.4655, 7.3765, 7.2884, 7.2012,\n 7.3322, 7.2459, 7.3758, 7.2904, 7.2058, 7.1220, 7.0391, 6.9570,\n 7.0857, 7.0043, 6.9237, 6.8439, 6.7648, 6.8922, 7.0187, 7.1443,\n 7.0658, 6.9879, 6.9107, 6.8343, 6.9587, 6.8828, 6.8076, 6.7330,\n 6.6591, 6.7823, 6.9048, 7.0265, 6.9529, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.5933, 7.7096, 7.8253, 7.7524, 7.8673,\n 7.7949, 7.9091, 8.0227, 8.1356, 8.2479, 8.3595, 8.2874, 8.3984,\n 8.5088, 8.6186, 8.5469, 8.4757, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.6603, 8.7676, 8.8744, 8.9806, 9.0863, 9.0164, 8.9469, 9.0520,\n 8.9830, 8.9145, 9.0190, 9.1230, 9.0549, 8.9872, 9.0906, 9.1936,\n 9.2960, 9.3980, 9.3306, 9.4321, 9.3651, 9.2986, 9.2324, 9.3333,\n 9.2676, 9.3680, 9.3026, 9.2376, 9.3375, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man sitting on the floor plays a guitar.\nSentence 2: A man sitting on the floor in a room is strumming a guitar.\nSimilarity score:", + "true_label": 4.800000190734863, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.4763, 8.3789,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 9.1273, 9.2480, 9.1561, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.5620, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.6471, 9.5638, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.9249, 9.8431, 9.9524, 10.0611, 10.1692, 10.2766, 10.1955, 10.1151,\n 10.2220, 10.1423, 10.0631, 9.9846, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.1469, 10.0701, 9.9940, 9.9184, 10.0231, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.6927, 10.6187,\n 10.7189, 10.6455, 10.5725, 10.6722, 10.7714, 10.6990, 10.7978, 10.7258,\n 10.6544, 10.5833, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.1667,\n 11.0961, 11.0261, 10.9564, 10.8872, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A puppy plays with a plastic container.\nSentence 2: The dog is playing with a plastic container.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.6433, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, 0.0000,\n -0.0434, -0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.2111, 0.1684, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 7.8520, 7.7555, 7.8889, 7.7937, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.2372, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.1354, 8.0497, 7.9649, 7.8808, 8.0042, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.3231, 8.4423, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 8.9783, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.2790, 10.2029, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.3154, 10.4170, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.5998, 10.5278, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man plays the violin.\nSentence 2: A man is playing violin.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.0401, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.9711, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -0.9509, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.0702, 8.9763, 9.0987, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man plays the piano.\nSentence 2: A man is playing a piano.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is eating food.\nSentence 2: A man is eating something.\nSimilarity score:", + "true_label": 4.199999809265137, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.5133, 1.4382, 1.3641, 1.5492,\n 1.4755, 1.4027, 1.5843, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.6558, 0.8040, 0.7509, 0.6983, 0.8447, 0.9901,\n 0.9372, 1.0812, 1.0284, 1.1711, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.0879, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.0598, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.8245, 0.9520, 0.9062, 1.0328, 0.9870, 1.1127, 1.0670, 1.1918,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.4699, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.5752, 1.5298, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.5671, 1.4697, 1.6977, 1.6013, 1.5068, 1.7285, 1.6348, 1.8516,\n 2.0647, 1.9711, 1.8791, 2.0870, 2.2916, 2.1997, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.5205, 2.7107, 2.6222, 2.8093, 2.7217,\n 2.9057, 2.8189, 3.0000, 2.9140, 3.0924, 3.2686, 3.4427, 3.3566,\n 3.2717, 3.4429, 3.6122, 3.5277, 3.6947, 3.8600, 4.0234, 4.1851,\n 4.3451, 4.5035, 4.6603, 4.5747, 4.7296, 4.6448, 4.7980, 4.7140,\n 4.6311, 4.7823, 4.9322, 5.0807, 5.2278, 5.3736, 5.5181, 5.6614,\n 5.5780, 5.7199, 5.8605, 5.7778, 5.6959, 5.8351, 5.9732, 6.1101,\n 6.0287, 6.1644, 6.2991, 6.2183, 6.3517, 6.4842, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.9237, 6.8439, 6.9714, 7.0980, 7.0187, 6.9402,\n 6.8624, 6.9879, 7.1125, 7.0353, 7.1590, 7.2818, 7.4039, 7.5251,\n 7.6456, 7.7653, 7.8842, 8.0024, 7.9253, 7.8489, 7.9663, 8.0829,\n 8.0070, 8.1229, 8.2381, 8.1628, 8.0880, 8.0139, 8.1282, 8.2420,\n 8.3550, 8.2813, 8.3937, 8.3205, 8.4322, 8.3595, 8.4706, 8.3984,\n 8.5088, 8.6186, 8.5469, 8.4757, 8.5848, 8.5141, 8.4439, 8.3742,\n 8.4826, 8.5905, 8.5212, 8.6284, 8.5595, 8.6662, 8.7724, 8.7039,\n 8.8094, 8.7414, 8.6738, 8.6066, 8.5399, 8.4736, 8.5785, 8.6828,\n 8.6169, 8.5513, 8.6551, 8.7584, 8.6932, 8.6284, 8.7311, 8.6667,\n 8.6026, 8.5390, 8.4757, 8.4128, 8.5148, 8.6164, 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a basketball.\nSentence 2: A man is playing a piano.\nSimilarity score:", + "true_label": 1.399999976158142, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.7765, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.7256, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.6885, -0.7295, -0.7703, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.1996, 6.1012, 6.2517, 6.4008,\n 6.3035, 6.2075, 6.3549, 6.2601, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.8819, 6.7890, 6.9282, 7.0662, 7.2029, 7.3386, 7.2466,\n 7.1556, 7.0657, 7.2001, 7.1111, 7.0231, 7.1563, 7.2884, 7.4194,\n 7.5494, 7.4622, 7.3758, 7.5048, 7.4193, 7.3346, 7.4625, 7.5895,\n 7.7155, 7.6315, 7.5484, 7.6734, 7.7976, 7.7152, 7.8384, 7.9608,\n 8.0824, 8.0006, 7.9196, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.3550, 8.4718, 8.5879, 8.5088, 8.4303, 8.5456, 8.6603,\n 8.5824, 8.6963, 8.6190, 8.7323, 8.8448, 8.9567, 8.8800, 8.8039,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.9851, 9.0944, 9.0200, 9.1287,\n 9.2368, 9.1629, 9.2704, 9.1970, 9.3040, 9.4103, 9.5161, 9.4432,\n 9.5485, 9.6532, 9.5808, 9.6850, 9.7886, 9.8918, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.0547, 9.9837, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.4140, 10.3439, 10.4427, 10.5410, 10.4713, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.7222, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A toddler walks down a hallway.\nSentence 2: A little girl is walking down a hallway.\nSimilarity score:", + "true_label": 3.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.0739, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.6888, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.5618, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.4042, -2.4393, -2.4744, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 4.0012, 3.8490, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.6030, 6.4866, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.2827, 7.1813, 7.0812, 7.2232, 7.3638, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.1176, 8.2488, 8.1524,\n 8.0571, 7.9630, 7.8699, 7.7778, 7.9079, 8.0370, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.7652, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.6186, 9.5346, 9.4513, 9.3686, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.2486, 10.1695, 10.0910, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.2790, 10.3827, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.8170, 10.7415, 10.6665, 10.5921, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.7714, 10.8702, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.2129, 11.1415, 11.0705, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A plane is landing.\nSentence 2: A animated airplane is landing.\nSimilarity score:", + "true_label": 2.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -1.8385, -1.6690, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.7041, -1.7488, -1.5882, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.5396,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.5818, -1.6231, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -1.9311, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -1.8527, -1.8898, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.7778, -1.8145, -1.8511, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188, 4.9010, 5.1711,\n 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855, 5.4271, 5.6614, 5.4444,\n 5.2372, 5.4678, 5.2705, 5.0811, 5.3072, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.4322, 5.2697, 5.4772, 5.3199, 5.5234, 5.7229, 5.5705, 5.4222,\n 5.2778, 5.1371, 5.3333, 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.1854,\n 5.3716, 5.2463, 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.1326, 5.3100,\n 5.1962, 5.0844, 5.2590, 5.4312, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.5630, 5.4610, 5.3605, 5.5213, 5.6804, 5.5811,\n 5.7382, 5.8936, 5.7955, 5.6986, 5.6032, 5.7566, 5.9084, 5.8139, 5.9641,\n 6.1128, 6.0193, 5.9270, 6.0740, 5.9827, 5.8926, 5.8035, 5.9488, 6.0927,\n 6.0044, 5.9172, 6.0596, 5.9732, 5.8878, 5.8034, 5.9442, 6.0838, 6.0000,\n 5.9171, 5.8351, 5.7540, 5.6737, 5.5942, 5.7318, 5.8684, 5.7894, 5.7112,\n 5.6338, 5.7689, 5.9029, 5.8260, 5.9589, 6.0908, 6.0143, 5.9386, 5.8635,\n 5.7892, 5.7155, 5.6424, 5.7726, 5.9019, 5.8292, 5.7572, 5.6858, 5.8138,\n 5.9409, 5.8698, 5.9960, 6.1213, 6.0506, 5.9805, 6.1047, 6.0351, 5.9660,\n 5.8974, 6.0205, 6.1429, 6.0746, 6.0069, 5.9397, 6.0609, 5.9941, 5.9279,\n 6.0481, 6.1677, 6.1017, 6.0362, 6.1548, 6.2728, 6.3901, 6.3247, 6.4413,\n 6.5571, 6.4920, 6.4274, 6.3632, 6.2994, 6.2361, 6.1732, 6.2879, 6.4019,\n 6.3392, 6.2770, 6.2152, 6.1537, 6.0927, 6.0321, 6.1449, 6.2572, 6.1968,\n 6.1367, 6.0770, 6.1884, 6.2993, 6.2398, 6.3500, 6.4597, 6.4004, 6.3414,\n 6.4504, 6.5588, 6.6667, 6.6078, 6.7151, 6.8219, 6.7632, 6.7049, 6.6469,\n 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A panda is climbing.\nSentence 2: A man is climbing a rope.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -1.9081, -1.9612, -1.7538, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.6166, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.3862,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -1.8352, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -2.0726, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.4061, 7.5615, 7.4370, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.3333, 9.4563, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 9.8858, 9.7912, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 9.7980,\n 9.9124, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.4537, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.8204, 10.9259, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.6802, 11.6016, 11.5234, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.5109, 11.4356, 11.3608, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.5261, 11.6217, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.9273, 12.0209, 11.9487, 12.0419, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Kids are dancing on stage.\nSentence 2: Some people are dancing on stage.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.7392, -1.5842,\n -1.6278, -1.6710, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.3904, -1.4335, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.7778, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 8.7913, 8.9138, 9.0354, 8.9444, 8.8544,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.3085, 10.4164, 10.3333, 10.4407, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.5955, 10.5145, 10.4341, 10.3544, 10.4596, 10.5642, 10.4852, 10.4067,\n 10.3289, 10.2516, 10.1749, 10.0987, 10.0231, 10.1273, 10.0523, 10.1559,\n 10.0814, 10.0074, 9.9340, 9.8611, 9.9642, 10.0668, 10.1690, 10.2706,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.5998, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 10.8505, 10.7795, 10.8770, 10.8064, 10.7363, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man plays a guitar.\nSentence 2: A man plays the piano.\nSimilarity score:", + "true_label": 1.399999976158142, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.2590, 5.1490, 5.0410, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640,\n 6.5069, 6.6486, 6.7890, 6.9282, 7.0662, 7.2029, 7.1110, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.1111, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 9.0453, 9.1615, 9.0773, 9.1927, 9.1094,\n 9.2240, 9.1414, 9.2554, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.3284, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.9773, 10.8984, 10.8200, 10.9220, 10.8443, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.7604, 11.8571, 11.9534, 12.0493, 11.9730, 12.0685, 11.9928, 12.0878,\n 12.0127, 11.9380, 12.0327, 12.1270, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.5183, 12.4448, 12.5367, 12.4638, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is slicing a meat.\nSentence 2: A woman is singing on stage.\nSimilarity score:", + "true_label": 0.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.5040, 0.4384, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.8520, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.0498, 0.9864, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.5507, 0.6983, 0.6460, 0.7921,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.0598, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.9382, 0.8909,\n 1.0215, 1.1513, 1.2804, 1.4087, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.3474, 1.3019, 1.4241, 1.5457, 1.5000,\n 1.6208, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.4550, 6.2993, 6.4846, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.6140, 7.7723, 7.6376, 7.7942,\n 7.6624, 7.5331, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.8428, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.0342, 9.9524, 9.8712, 9.9800, 10.0881, 10.0076, 10.1151,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.4244, 11.5234, 11.6220, 11.7200, 11.8176, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.7808, 11.8771, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.2209, 12.3143, 12.4074, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.5367, 12.6283, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is taking a picture.\nSentence 2: A man is playing a guitar.\nSimilarity score:", + "true_label": 0.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.3611, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.4474, 9.3617, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.7869, 9.7044, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 10.8200, 10.7423, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man is riding a horse.\nSentence 2: A woman is using a hoe.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.5384, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.7442, 4.6667, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.5448,\n 5.6725, 5.7994, 5.7295, 5.6602, 5.5915, 5.5233, 5.4557, 5.5811,\n 5.5138, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.7411, 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.7787, 5.7155, 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 6.1632, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is drilling a hole in a board.\nSentence 2: Someone is drilling a hole in a piece of wood.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 1.0596, 0.9869, 1.1767, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.4664, 1.6222, 1.7767, 1.9298, 2.0817, 2.0197, 2.1700, 2.1082,\n 2.0470, 2.1954, 2.1344, 2.0739, 2.0140, 1.9545, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 2.0101, 1.9524, 2.0948, 2.0373, 1.9803,\n 1.9237, 1.8676, 2.0078, 1.9518, 1.8962, 1.8411, 1.9795, 1.9245,\n 2.0617, 2.0068, 1.9524, 2.0881, 2.0338, 1.9799, 1.9263, 2.0605,\n 2.0071, 2.1401, 2.0868, 2.2188, 2.1656, 2.1128, 2.0604, 2.0083,\n 1.9566, 2.0866, 2.0350, 2.1640, 2.1125, 2.0613, 2.1892, 2.1381,\n 2.2650, 2.3912, 2.5166, 2.4653, 2.5898, 2.5386, 2.4877, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.4099, 2.5319, 2.6534, 2.6034, 2.5538,\n 2.5044, 2.4553, 2.4065, 2.5265, 2.4778, 2.4294, 2.5483, 2.5000,\n 2.6182, 2.5700, 2.5220, 2.4744, 2.4269, 2.3798, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.0012, 3.8490, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.9204, 6.8041,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.2178, 8.1152, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.1176, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.5896, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.1905, 9.1119, 9.0340, 8.9567, 9.0679, 8.9912,\n 8.9151, 9.0257, 9.1357, 9.2450, 9.1694, 9.0944, 9.0200, 8.9461,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.3374, 9.4432,\n 9.3708, 9.2990, 9.4042, 9.5089, 9.6130, 9.5416, 9.4707, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.6394, 9.7415, 9.6719, 9.6028, 9.5341,\n 9.4658, 9.3980, 9.4995, 9.6005, 9.7011, 9.6336, 9.5666, 9.6667,\n 9.7663, 9.6996, 9.6334, 9.5675, 9.6666, 9.6011, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is fitting silencer on a pistol.\nSentence 2: A man is adding a silencer to a gun.\nSimilarity score:", + "true_label": 4.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.3365, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.5101, -0.3698, -0.2304, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.2689, -0.1340, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.1703,\n -0.2122, -0.0847, 0.0422, 0.1684, 0.1260, 0.0838, 0.2089, 0.1667,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.3578, 6.5137, 6.4065, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.7006, 6.5993, 6.7469, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.6454, 6.5504, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.0211, 6.9282, 6.8364, 6.7456, 6.8834, 7.0201,\n 6.9303, 7.0657, 6.9768, 7.1111, 7.2443, 7.1563, 7.2884, 7.4194,\n 7.5494, 7.4622, 7.5912, 7.7192, 7.8463, 7.7598, 7.8859, 8.0111,\n 7.9254, 8.0497, 8.1731, 8.0882, 8.0042, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.1214, 8.2413, 8.3605, 8.2793, 8.3977, 8.3172,\n 8.4348, 8.3550, 8.2760, 8.1976, 8.1198, 8.2365, 8.3525, 8.2754,\n 8.3906, 8.5052, 8.4286, 8.5424, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.8008, 8.7270, 8.8369, 8.7636,\n 8.6908, 8.6186, 8.7278, 8.8364, 8.9444, 8.8726, 8.9800, 8.9087,\n 9.0155, 8.9447, 8.8744, 8.8045, 8.7351, 8.8413, 8.9469, 8.8780,\n 8.9830, 9.0876, 9.1916, 9.2952, 9.2265, 9.1584, 9.0906, 9.0233,\n 9.1262, 9.2287, 9.3306, 9.4321, 9.5331, 9.6336, 9.5666, 9.6667,\n 9.7663, 9.8654, 9.7987, 9.8974, 9.9957, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Two little girls are talking on the phone.\nSentence 2: A little girl is walking down the street.\nSimilarity score:", + "true_label": 0.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -0.9115, -0.9584, -0.8040, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 2.3238, 2.5538, 2.4422, 2.6667,\n 2.8868, 3.1027, 3.3147, 3.2026, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 3.1787, 3.3729, 3.2733, 3.1754,\n 3.0793, 2.9848, 3.1741, 3.0806, 3.2667, 3.4503, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.3556, 3.5301, 3.4427, 3.3566,\n 3.5283, 3.4429, 3.6122, 3.7796, 3.9452, 3.8600, 4.0234, 4.1851,\n 4.1003, 4.0166, 3.9340, 3.8523, 4.0112, 3.9302, 3.8503, 3.7712,\n 3.6931, 3.8492, 3.7717, 3.9260, 4.0788, 4.0016, 3.9253, 3.8497,\n 3.7750, 3.9254, 3.8512, 4.0000, 4.1475, 4.0736, 4.0004, 4.1461,\n 4.2907, 4.4341, 4.3609, 4.5029, 4.4302, 4.3583, 4.2870, 4.2164,\n 4.1464, 4.2862, 4.2167, 4.1478, 4.0795, 4.0119, 4.1498, 4.0825,\n 4.2191, 4.3548, 4.2877, 4.2212, 4.1552, 4.0898, 4.2237, 4.1586,\n 4.2914, 4.4233, 4.3583, 4.2940, 4.4246, 4.3605, 4.4901, 4.6188,\n 4.5549, 4.6826, 4.8095, 4.7458, 4.6825, 4.6198, 4.5575, 4.6829,\n 4.6209, 4.5594, 4.4983, 4.4376, 4.5617, 4.5013, 4.6245, 4.7469,\n 4.6867, 4.6268, 4.5674, 4.5083, 4.6295, 4.5707, 4.6911, 4.8107,\n 4.7520, 4.6938, 4.6359, 4.5783, 4.6968, 4.8146, 4.7572, 4.8742,\n 4.9906, 4.9333, 4.8763, 4.8197, 4.7635, 4.8787, 4.8227, 4.7670,\n 4.7117, 4.6567, 4.7709, 4.7161, 4.8295, 4.9425, 4.8877, 4.8333,\n 4.7792, 4.8913, 5.0027, 5.1137, 5.0595, 5.1698, 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Someone is stirring chili in a kettle.\nSentence 2: Someone is stirring a pot of chili with a spoon.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.2%", + "z-score": "-3.2", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -2.8532, -2.8887, -2.9241, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.1674, -3.2004])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094, 2.6605, 2.9938,\n 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712, 4.0415, 3.8497, 3.6667,\n 3.9279, 4.1812, 4.0056, 4.2515, 4.0825, 4.3217, 4.1586, 4.3916, 4.2339,\n 4.4610, 4.3083, 4.5301, 4.7469, 4.9592, 4.8107, 4.6664, 4.5260, 4.7336,\n 4.5968, 4.8003, 4.6667, 4.5363, 4.7357, 4.9316, 4.8038, 4.9962, 4.8712,\n 5.0602, 5.2463, 5.1236, 5.0034, 4.8857, 4.7703, 4.9528, 5.1326, 5.0190,\n 5.1962, 5.3709, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 5.9333, 5.8279, 5.7242, 5.8835, 5.7812, 5.6804, 5.5811,\n 5.7382, 5.8936, 6.0474, 5.9491, 6.1012, 6.0041, 6.1546, 6.0587, 5.9641,\n 5.8707, 5.7785, 5.9270, 6.0740, 5.9827, 5.8926, 6.0380, 6.1820, 6.0927,\n 6.2354, 6.1470, 6.2883, 6.2008, 6.1143, 6.0288, 5.9442, 6.0838, 6.2222,\n 6.1383, 6.2755, 6.1924, 6.3283, 6.4632, 6.5970, 6.5144, 6.6471, 6.7788,\n 6.6968, 6.8274, 6.7462, 6.8757, 7.0043, 6.9237, 6.8439, 6.9714, 7.0980,\n 7.2236, 7.1443, 7.2691, 7.3930, 7.5161, 7.6383, 7.5595, 7.6808, 7.8014,\n 7.7232, 7.6456, 7.7653, 7.6883, 7.6120, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.6235, 7.7407, 7.6667, 7.7831, 7.8988, 8.0139, 8.1282, 8.2420, 8.3550,\n 8.4674, 8.3937, 8.3205, 8.2479, 8.3595, 8.2874, 8.3984, 8.5088, 8.4371,\n 8.5469, 8.4757, 8.5848, 8.6933, 8.6226, 8.7305, 8.8379, 8.7676, 8.8744,\n 8.9806, 8.9107, 9.0164, 8.9469, 9.0520, 8.9830, 9.0876, 9.1916, 9.2952,\n 9.3982, 9.5007, 9.4320, 9.3638, 9.4658, 9.3980, 9.3306, 9.4321, 9.5331,\n 9.4661, 9.5666, 9.6667, 9.6000, 9.5338, 9.4680, 9.5675, 9.6666, 9.6011,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The woman is cracking eggs into a bowl.\nSentence 2: The lady broke raw eggs into a bowl.\nSimilarity score:", + "true_label": 4.800000190734863, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -1.9245, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.2813, -2.3238, -2.3660, -2.2030, -2.0412,\n -2.0841, -2.1268, -1.9673, -1.8091, -1.8523, -1.6958, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.4967, -1.5396,\n -1.3904, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.4194, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.5492, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.5377, -1.5752, -1.6125, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "86", + "Fraction of T in Greenlist": "43.2%", + "z-score": "5.93", + "p value": "1.47e-09", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.0446, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.0937, 3.9837, 3.8759, 4.0657, 4.2528, 4.1461, 4.0415,\n 3.9386, 3.8376, 4.0205, 3.9208, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.4296, 4.3333, 4.2385, 4.1451, 4.3146, 4.2222, 4.1312,\n 4.0415, 4.2080, 4.3727, 4.2836, 4.1957, 4.3580, 4.2710, 4.4313,\n 4.3451, 4.5035, 4.6603, 4.5747, 4.7296, 4.8830, 4.7980, 4.7140,\n 4.8655, 4.7823, 4.9322, 4.8497, 4.9980, 5.1450, 5.0630, 5.2085,\n 5.3526, 5.2713, 5.1908, 5.1111, 5.0323, 5.1745, 5.0964, 5.2372,\n 5.3769, 5.2992, 5.4377, 5.5750, 5.4977, 5.4212, 5.3455, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.3189, 5.4521, 5.3793, 5.3072,\n 5.2358, 5.1650, 5.2965, 5.2262, 5.3567, 5.4863, 5.4163, 5.5448,\n 5.6725, 5.6028, 5.5336, 5.4650, 5.3970, 5.5233, 5.4557, 5.5811,\n 5.7056, 5.6383, 5.7619, 5.8848, 5.8177, 5.7511, 5.8730, 5.8068,\n 5.9279, 5.8621, 5.7967, 5.7319, 5.8519, 5.9711, 5.9065, 5.8424,\n 5.7787, 5.8969, 6.0145, 5.9510, 5.8880, 5.8254, 5.9420, 6.0579,\n 5.9956, 5.9336, 6.0487, 5.9871, 6.1014, 6.0401, 5.9792, 5.9186,\n 6.0321, 6.1449, 6.0846, 6.0246, 5.9651, 5.9059, 6.0177, 5.9588,\n 5.9002, 5.8420, 5.9530, 6.0635, 6.0054, 5.9477, 5.8904, 5.8333,\n 5.9429, 5.8861, 5.8296, 5.7735, 5.8822, 5.9905, 5.9345])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman puts flour on a piece of meat.\nSentence 2: A woman is putting flour onto some meat.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.5850, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.5033, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 7.8889, 7.7937, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.5642, 10.4852, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.6082, 11.5329, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.1468, 12.0731, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is talking on a phone.\nSentence 2: A man is singing to a woman.\nSimilarity score:", + "true_label": 0.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857, 0.7006, 0.9258,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.5227, 0.4685, 0.6222, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.4845, 0.4345, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.4652, 0.4174, 0.3698, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.4885, 0.4428, 0.5740, 0.5283, 0.4828, 0.4377, 0.5674, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.3721, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.9415, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.1054, 2.3333,\n 2.2269, 2.4495, 2.3445, 2.5621, 2.7757, 2.9856, 3.1918, 3.3947,\n 3.2883, 3.1840, 3.0817, 3.2796, 3.4743, 3.6662, 3.8552, 3.7528,\n 3.6522, 3.5533, 3.7383, 3.9208, 4.1008, 4.2784, 4.4537, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.2981, 5.1978, 5.3605, 5.2615, 5.1640,\n 5.3245, 5.4832, 5.3867, 5.5435, 5.4482, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.4772, 5.6286, 5.5377, 5.4480, 5.5976, 5.5088, 5.4212,\n 5.3345, 5.4822, 5.6285, 5.7735, 5.6874, 5.6023, 5.7457, 5.8878,\n 5.8034, 5.9442, 5.8605, 5.7778, 5.6959, 5.8351, 5.9732, 6.1101,\n 6.2459, 6.3807, 6.5144, 6.6471, 6.7788, 6.9094, 6.8274, 6.9570,\n 7.0857, 7.0043, 7.1319, 7.0513, 7.1779, 7.3037, 7.2236, 7.3485,\n 7.2691, 7.1904, 7.3143, 7.4373, 7.3592, 7.4813, 7.6026, 7.5251,\n 7.6456, 7.5687, 7.4924, 7.4168, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.9318, 7.8571, 7.9729, 8.0880, 8.2024, 8.1282, 8.2420,\n 8.1683, 8.0952, 8.2082, 8.3205, 8.2479, 8.3595, 8.4706, 8.3984,\n 8.5088, 8.6186, 8.7278, 8.8364, 8.9444, 9.0518, 9.1587, 9.2651,\n 9.3708, 9.4761, 9.5808, 9.6850, 9.6130, 9.7167, 9.6452, 9.5743,\n 9.6774, 9.7800, 9.8821, 9.8116, 9.9132, 9.8431, 9.9442, 10.0448,\n 9.9752, 10.0753, 10.0061, 9.9374, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.4312, 10.5286, 10.6256, 10.7222, 10.8184, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing an electronic keyboard.\nSentence 2: A man is playing a flute.\nSimilarity score:", + "true_label": 1.2000000476837158, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.9625, 9.8753, 9.7890, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 9.9807, 10.0906, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 10.8515, 10.7719,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.9936, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.4581, 11.3837, 11.3099, 11.2366,\n 11.3335, 11.4300, 11.3572, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.9487, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A man is eating a banana.\nSimilarity score:", + "true_label": 0.6000000238418579, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.7896, 0.7461, 0.8682, 0.9897, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.0844, 4.9747, 5.1490, 5.0410, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.4000, 5.5630, 5.7242, 5.6220, 5.7812, 5.6804,\n 5.8377, 5.7382, 5.6401, 5.7955, 5.6986, 5.6032, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.3248, 6.4663, 6.3768, 6.2883, 6.4283, 6.3408,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.5807, 6.4957, 6.6308, 6.7648,\n 6.6804, 6.5970, 6.7298, 6.8615, 6.7788, 6.9094, 7.0391, 7.1678,\n 7.2956, 7.2134, 7.1319, 7.2587, 7.3845, 7.5094, 7.6335, 7.5526,\n 7.4724, 7.3930, 7.3143, 7.4373, 7.5595, 7.4813, 7.6026, 7.7232,\n 7.8429, 7.7653, 7.6883, 7.8072, 7.9253, 7.8489, 7.7732, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.7831, 7.8988, 8.0139, 8.1282, 8.0546,\n 8.1683, 8.0952, 8.2082, 8.3205, 8.2479, 8.3595, 8.2874, 8.3984,\n 8.5088, 8.4371, 8.5469, 8.4757, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.8379, 8.9447, 9.0510, 8.9806, 8.9107, 9.0164, 9.1215, 9.2261,\n 9.1566, 9.0876, 9.1916, 9.2952, 9.2265, 9.3295, 9.4320, 9.5341,\n 9.4658, 9.5673, 9.4995, 9.6005, 9.7011, 9.8012, 9.7337, 9.6667,\n 9.7663, 9.8654, 9.9641, 9.8974, 9.8311, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is jumping into water.\nSentence 2: A man is cutting paper.\nSimilarity score:", + "true_label": 0.800000011920929, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.8165, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.6202, -0.6598, -0.6993, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.5627, 2.4351, 2.6811, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.6681, 2.5538, 2.7791, 3.0000,\n 3.2167, 3.1027, 2.9913, 2.8823, 2.7757, 2.9856, 2.8804, 2.7775,\n 2.9824, 3.1840, 3.3824, 3.2796, 3.4743, 3.3729, 3.2733, 3.1754,\n 3.0793, 2.9848, 2.8919, 3.0806, 3.2667, 3.4503, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.3556, 3.5301, 3.4427, 3.3566,\n 3.2717, 3.4429, 3.3587, 3.2757, 3.1937, 3.3619, 3.2806, 3.4466,\n 3.3659, 3.5298, 3.6919, 3.6116, 3.5322, 3.4538, 3.6133, 3.5355,\n 3.4586, 3.3826, 3.5396, 3.4641, 3.6193, 3.5443, 3.6977, 3.6233,\n 3.7750, 3.7011, 3.6279, 3.5556, 3.4839, 3.4130, 3.5620, 3.4915,\n 3.6389, 3.5689, 3.7148, 3.6452, 3.5762, 3.5079, 3.4402, 3.3731,\n 3.5166, 3.4499, 3.5920, 3.5256, 3.6664, 3.6004, 3.5350, 3.4701,\n 3.4058, 3.3420, 3.4806, 3.4171, 3.5544, 3.4913, 3.6274, 3.5645,\n 3.5022, 3.4403, 3.3789, 3.3181, 3.4521, 3.3915, 3.5245, 3.4641,\n 3.5960, 3.5359, 3.4762, 3.4170, 3.5474, 3.4884, 3.6178, 3.5590,\n 3.6874, 3.6289, 3.7563, 3.6980, 3.6401, 3.5827, 3.5256, 3.6515,\n 3.7766, 3.7196, 3.8438, 3.7870, 3.9104, 3.8538, 3.7975, 3.7417,\n 3.8638, 3.8081, 3.9294, 3.8740, 3.9945, 3.9392, 4.0589, 4.0038,\n 3.9491, 3.8947, 3.8406, 3.7869, 3.9052, 3.8516, 3.9691, 3.9158,\n 4.0325, 3.9793, 3.9265, 3.8739, 3.8216, 3.7697, 3.8851, 3.8333,\n 3.9481, 3.8964, 4.0105, 3.9590, 3.9078, 3.8569, 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a guitar.\nSentence 2: A guy is playing an instrument.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.2493, 11.1621,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.3423, 11.2589, 11.3616, 11.2789, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.1746, 13.0956, 13.0171, 12.9391,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.1520, 13.2429, 13.1667,\n 13.0910, 13.1815, 13.1063, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A young man is playing the piano.\nSentence 2: A woman is peeling a prawn.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.2129, 5.1065, 5.0019, 5.1711,\n 5.0680, 4.9666, 5.1333, 5.2981, 5.1978, 5.3605, 5.5213, 5.4222,\n 5.5811, 5.4832, 5.6401, 5.7955, 5.6986, 5.6032, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.5569, 6.6973, 6.6066, 6.5169, 6.6559, 6.5672,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.5807, 6.7159, 6.8500, 6.9830,\n 7.1149, 7.0296, 6.9451, 6.8615, 6.9923, 6.9094, 7.0391, 7.1678,\n 7.0857, 7.2134, 7.1319, 7.2587, 7.1779, 7.3037, 7.2236, 7.3485,\n 7.4724, 7.3930, 7.5161, 7.6383, 7.5595, 7.6808, 7.6026, 7.5251,\n 7.6456, 7.5687, 7.4924, 7.4168, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.9318, 8.0476, 7.9729, 8.0880, 8.2024, 8.1282, 8.2420,\n 8.1683, 8.2813, 8.3937, 8.3205, 8.2479, 8.1758, 8.2874, 8.3984,\n 8.3268, 8.4371, 8.5469, 8.6560, 8.5848, 8.6933, 8.6226, 8.7305,\n 8.8379, 8.9447, 8.8744, 8.8045, 8.7351, 8.8413, 8.7724, 8.7039,\n 8.6359, 8.5683, 8.5012, 8.6066, 8.7116, 8.8160, 8.7492, 8.8531,\n 8.7867, 8.8900, 8.8240, 8.9268, 9.0292, 9.1310, 9.2324, 9.3333,\n 9.4338, 9.3680, 9.4680, 9.5675, 9.5021, 9.6011, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is mixing ingrediants.\nSentence 2: A woman is mixing food in a bowl.\nSimilarity score:", + "true_label": 3.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.3362, -1.3779, -1.4194, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.2982, -1.3377, -1.2049, -1.2445, -1.1127, -1.1523, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.0777, -0.9509, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "85", + "Fraction of T in Greenlist": "42.7%", + "z-score": "5.77", + "p value": "3.95e-09", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.3113, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.1334, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.5466, 3.4293, 3.3147, 3.2026, 3.0929, 2.9856, 3.1918, 3.0861,\n 3.2883, 3.4873, 3.3824, 3.2796, 3.1787, 3.0796, 3.2733, 3.1754,\n 3.3657, 3.5533, 3.4562, 3.3607, 3.2667, 3.1743, 3.3574, 3.2660,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.7849, 3.9530, 4.1192, 4.0316, 3.9452, 3.8600, 3.7758, 3.6927,\n 3.8555, 3.7732, 3.9340, 4.0931, 4.0112, 3.9302, 3.8503, 3.7712,\n 3.9276, 3.8492, 4.0038, 4.1569, 4.0788, 4.0016, 3.9253, 3.8497,\n 4.0004, 3.9254, 4.0745, 4.2222, 4.1475, 4.0736, 4.2196, 4.1461,\n 4.2907, 4.2178, 4.3609, 4.5029, 4.6437, 4.5708, 4.4987, 4.4272,\n 4.5663, 4.4953, 4.6332, 4.7700, 4.6992, 4.8348, 4.9695, 5.1031,\n 5.0325, 4.9624, 4.8930, 4.8242, 4.7559, 4.8878, 4.8200, 4.9507,\n 5.0806, 5.0130, 4.9460, 4.8795, 4.8135, 4.9419, 4.8763, 5.0037,\n 5.1303, 5.0649, 5.0000, 4.9356, 4.8717, 4.9969, 4.9333, 5.0576,\n 5.1810, 5.1177, 5.0548, 5.1772, 5.1146, 5.2362, 5.1739, 5.2947,\n 5.4147, 5.5340, 5.4718, 5.4100, 5.3487, 5.2877, 5.2272, 5.3452,\n 5.2850, 5.4023, 5.5189, 5.4588, 5.3991, 5.3398, 5.2809, 5.3964,\n 5.3377, 5.4526, 5.5668, 5.5082, 5.4501, 5.5635, 5.5056, 5.6183,\n 5.5606, 5.6727, 5.7841, 5.8951, 5.8375, 5.7802, 5.7233, 5.6667,\n 5.6104, 5.7203, 5.6643, 5.7735, 5.8822, 5.8263, 5.7707])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Someone is shredding cabbage leaves with a knife.\nSentence 2: Someone is chopping some cabbage leaves.\nSimilarity score:", + "true_label": 4.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.0501, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.2949, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.2170, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.0420, 0.0838, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.0494, 1.9245, 1.8034, 2.0605, 2.3113, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.1334, 3.0123, 2.8943, 2.7791, 3.0000,\n 2.8868, 3.1027, 3.3147, 3.5228, 3.7273, 3.9284, 4.1260, 4.0119,\n 3.9001, 3.7905, 3.6831, 3.5777, 3.7700, 3.6662, 3.8552, 4.0415,\n 3.9386, 3.8376, 3.7383, 3.6407, 3.8228, 3.7264, 3.9056, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.8819, 3.7916, 3.9624, 4.1312,\n 4.0415, 3.9530, 4.1192, 4.0316, 3.9452, 4.1090, 4.2710, 4.4313,\n 4.5899, 4.7469, 4.9023, 4.8154, 4.7296, 4.6448, 4.5611, 4.4783,\n 4.3966, 4.5491, 4.7001, 4.8497, 4.9980, 5.1450, 5.2906, 5.2085,\n 5.1273, 5.0469, 4.9675, 4.8889, 4.8111, 4.9543, 5.0964, 5.2372,\n 5.3769, 5.5155, 5.6530, 5.5750, 5.4977, 5.4212, 5.5572, 5.4813,\n 5.4061, 5.5407, 5.6743, 5.8069, 5.9386, 6.0693, 6.1990, 6.1237,\n 6.0491, 5.9752, 6.1036, 6.0302, 5.9575, 6.0848, 6.2113, 6.3369,\n 6.4618, 6.5857, 6.7089, 6.6361, 6.5639, 6.4923, 6.4213, 6.5433,\n 6.4728, 6.5939, 6.7143, 6.8339, 6.9529, 7.0711, 7.1886, 7.1181,\n 7.0481, 6.9786, 6.9097, 6.8413, 6.7734, 6.8897, 7.0054, 7.1204,\n 7.2348, 7.3485, 7.4616, 7.3937, 7.3263, 7.2594, 7.1929, 7.1270,\n 7.0614, 7.1735, 7.2849, 7.3958, 7.5061, 7.6158, 7.7249, 7.6594,\n 7.5944, 7.5297, 7.4655, 7.4017, 7.3383, 7.4465, 7.5542, 7.6613,\n 7.7679, 7.8740, 7.9796, 7.9162, 7.8533, 7.7907, 7.8956, 7.8333,\n 7.7715, 7.8758, 7.9796, 8.0829, 8.1858, 8.2882, 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A slow lori walks around.\nSentence 2: A animal is walking around.\nSimilarity score:", + "true_label": 2.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.7997, -1.6632, -1.7021, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.7778, -1.8145, -1.8511, -1.7233, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.8301, 3.0792, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.6098, 3.4816, 3.3566, 3.2348, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.4101, 3.2998, 3.5032, 3.7033,\n 3.9001, 3.7905, 3.6831, 3.8759, 4.0657, 4.2528, 4.4371, 4.3301,\n 4.2251, 4.1219, 4.0205, 3.9208, 4.1008, 4.0024, 4.1797, 4.3546,\n 4.2571, 4.1612, 4.3333, 4.2385, 4.4083, 4.5760, 4.4820, 4.3894,\n 4.5547, 4.7181, 4.8797, 5.0395, 4.9472, 4.8561, 4.7662, 4.6775,\n 4.5899, 4.7469, 4.6603, 4.8154, 4.9691, 4.8830, 4.7980, 4.7140,\n 4.8655, 5.0156, 5.1643, 5.0807, 4.9980, 5.1450, 5.2906, 5.4349,\n 5.3526, 5.2713, 5.1908, 5.1111, 5.0323, 5.1745, 5.0964, 5.2372,\n 5.3769, 5.2992, 5.2223, 5.1461, 5.2842, 5.4212, 5.5572, 5.4813,\n 5.4061, 5.5407, 5.6743, 5.8069, 5.9386, 5.8635, 5.7892, 5.7155,\n 5.6424, 5.5701, 5.7001, 5.6282, 5.7572, 5.8853, 5.8138, 5.7429,\n 5.8698, 5.7994, 5.9254, 6.0506, 5.9805, 5.9109, 6.0351, 6.1584,\n 6.2810, 6.4028, 6.3333, 6.2644, 6.1961, 6.1283, 6.0609, 6.1815,\n 6.1146, 6.2342, 6.3532, 6.2866, 6.2205, 6.3385, 6.2728, 6.3901,\n 6.5067, 6.4413, 6.3762, 6.4920, 6.6072, 6.7217, 6.8355, 6.7706,\n 6.7061, 6.6421, 6.5785, 6.5153, 6.6282, 6.7404, 6.6775, 6.6150,\n 6.7264, 6.8373, 6.9477, 7.0574, 6.9950, 6.9330, 6.8713, 6.8101,\n 6.7492, 6.8580, 6.7974, 6.9056, 7.0133, 6.9530, 6.8930, 6.8333,\n 6.9403, 7.0467, 7.1527, 7.0932, 7.0340, 7.1393, 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A little boy is vacuuming the floor.\nSentence 2: A boy is vacuuming.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.3797, -0.4257, -0.2828, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.2261, 0.3607, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.2100, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "89", + "Fraction of T in Greenlist": "44.7%", + "z-score": "6.43", + "p value": "6.57e-11", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.3566, 3.5753, 3.7897, 3.6667,\n 3.5466, 3.4293, 3.3147, 3.5228, 3.4101, 3.6141, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.3824, 3.5777, 3.4743, 3.6662, 3.8552, 3.7528,\n 3.6522, 3.5533, 3.7383, 3.9208, 4.1008, 4.0024, 4.1797, 4.3546,\n 4.2571, 4.1612, 4.0667, 3.9736, 4.1451, 4.0531, 4.2222, 4.3894,\n 4.2981, 4.2080, 4.3727, 4.2836, 4.4462, 4.6070, 4.5186, 4.6775,\n 4.8347, 4.7469, 4.6603, 4.5747, 4.4901, 4.6448, 4.5611, 4.7140,\n 4.8655, 4.7823, 4.7001, 4.6188, 4.5384, 4.6876, 4.8355, 4.7556,\n 4.9019, 5.0469, 4.9675, 4.8889, 4.8111, 4.7341, 4.8772, 4.8008,\n 4.9424, 5.0829, 5.0070, 4.9317, 4.8572, 4.7834, 4.9221, 5.0596,\n 4.9862, 5.1225, 5.2578, 5.1848, 5.1123, 5.0406, 4.9695, 5.1031,\n 5.0325, 5.1650, 5.2965, 5.2262, 5.1564, 5.2868, 5.2175, 5.3468,\n 5.4752, 5.4062, 5.5336, 5.6602, 5.5915, 5.5233, 5.4557, 5.3886,\n 5.5138, 5.4471, 5.5714, 5.6949, 5.6285, 5.5626, 5.6851, 5.6195,\n 5.7411, 5.8621, 5.7967, 5.9168, 6.0362, 5.9711, 5.9065, 5.8424,\n 5.7787, 5.8969, 5.8336, 5.9510, 6.0678, 6.0047, 5.9420, 5.8797,\n 5.8179, 5.9336, 6.0487, 5.9871, 6.1014, 6.2152, 6.1537, 6.0927,\n 6.0321, 5.9718, 6.0846, 6.0246, 6.1367, 6.2482, 6.1884, 6.1290,\n 6.0700, 6.0113, 6.1219, 6.2319, 6.1734, 6.2828, 6.3917, 6.3333,\n 6.2753, 6.2177, 6.1604, 6.2684, 6.2113, 6.3187, 6.4256])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is applying eye liner to her eyelid using an eye pencil.\nSentence 2: A woman is applying cosmetics to her eyelid.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.9949, 1.1991, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.2501, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.3770, 1.3101, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.2719, 1.4313, 1.5892, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.6513, 0.7884, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.8909,\n 0.8438, 0.9742, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 1.0445,\n 1.1717, 1.2982, 1.2514, 1.2049, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 9.8792, 9.7778, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.1991, 11.3091, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.1366, 12.2381, 12.3391, 12.2503, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.2034, 12.3027, 12.4015, 12.3167, 12.2325,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.7199, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 12.9891, 13.0821, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.3060, 13.3967, 13.4871, 13.4100, 13.3333,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is making a bed.\nSentence 2: A woman is playing a guitar.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -0.9409, -0.7809, -0.6222, -0.6713, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.2817, -1.3197, -1.3574, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.3611, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.4474, 9.3617, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.7869, 9.7044, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.2753, 10.3805, 10.3020, 10.2242,\n 10.3289, 10.2516, 10.3557, 10.2790, 10.2029, 10.3065, 10.4097, 10.3341,\n 10.2591, 10.1846, 10.2872, 10.2132, 10.3154, 10.2419, 10.3435, 10.4447,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man plays a keyboard.\nSentence 2: A person is playing the keyboard.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "85", + "Fraction of T in Greenlist": "42.7%", + "z-score": "5.77", + "p value": "3.95e-09", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 1.2910, 1.5323, 1.4317, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.5785, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.7634, 1.9604, 1.8766, 1.7942, 1.9870, 1.9052,\n 2.0948, 2.0135, 1.9333, 1.8543, 1.7765, 1.6997, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.2337, 1.1721, 1.3333, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.4059, 1.3460, 1.5010, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.8370, 1.9863, 2.1344, 2.0739, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.9524, 1.8953, 2.0373, 1.9803,\n 2.1210, 2.0642, 2.2037, 2.1470, 2.2852, 2.4225, 2.3657, 2.5019,\n 2.6370, 2.5802, 2.7143, 2.8475, 2.9798, 3.1113, 3.2419, 3.1844,\n 3.3140, 3.4428, 3.5708, 3.5131, 3.4558, 3.3989, 3.5256, 3.6515,\n 3.5946, 3.5382, 3.6629, 3.7870, 3.9104, 4.0330, 4.1549, 4.0980,\n 4.2191, 4.1624, 4.2827, 4.4023, 4.5212, 4.4644, 4.5826, 4.7001,\n 4.6434, 4.7602, 4.7037, 4.6476, 4.7635, 4.7076, 4.8227, 4.7670,\n 4.8815, 4.9953, 5.1086, 5.2213, 5.3335, 5.4451, 5.3891, 5.5000,\n 5.4442, 5.5545, 5.6643, 5.6085, 5.5532, 5.6622, 5.7707])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A rabbit is playing with a toy rabbit.\nSentence 2: A bunny is playing with a stuffed bunny.\nSimilarity score:", + "true_label": 4.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 1.1088, 1.0265, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.5592, 0.7089, 0.6558, 0.8040, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.9272, 0.8805, 0.8340, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.8065, 0.9313, 0.8866, 0.8422, 0.7979, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.9119, 1.0336, 0.9897, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.8490, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.3033, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.4501, 9.5668, 9.4778, 9.3897,\n 9.5057, 9.4185, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.7242, 10.8282, 10.7480, 10.8515, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.0235, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.8014, 11.7261, 11.6514, 11.7473,\n 11.8429, 11.9380, 11.8638, 11.9586, 12.0529, 12.1468, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.1141, 12.2068, 12.1347, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A plane rides on a road.\nSentence 2: An airplane moves along a runway.\nSimilarity score:", + "true_label": 2.812000036239624, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.2722, -0.1357, -0.1803, -0.2247, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.2487, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "46.2%", + "z-score": "6.92", + "p value": "2.31e-12", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188, 4.9010, 5.1711,\n 5.4306, 5.6804, 5.4175, 5.1698, 5.4174, 5.6569, 5.8890, 6.1143, 5.8889,\n 5.6737, 5.8966, 5.6921, 5.4958, 5.7155, 5.5277, 5.7429, 5.9530, 5.7735,\n 5.6000, 5.8068, 5.6395, 5.8424, 5.6805, 5.5234, 5.7229, 5.9186, 6.1107,\n 5.9588, 5.8108, 5.6667, 5.5261, 5.7155, 5.5783, 5.4444, 5.3134, 5.4997,\n 5.6830, 5.8635, 5.7354, 5.6099, 5.4870, 5.3666, 5.2485, 5.4259, 5.3100,\n 5.1962, 5.3709, 5.5432, 5.7133, 5.6011, 5.4909, 5.3825, 5.2760, 5.1711,\n 5.0680, 4.9666, 4.8667, 5.0332, 5.1978, 5.3605, 5.2615, 5.1640, 5.0679,\n 5.2281, 5.1332, 5.2915, 5.1977, 5.1051, 5.2614, 5.4160, 5.5691, 5.4772,\n 5.3865, 5.2970, 5.2086, 5.3594, 5.5088, 5.4212, 5.3345, 5.4822, 5.6285,\n 5.7735, 5.6874, 5.6023, 5.5181, 5.4349, 5.5780, 5.7199, 5.6373, 5.5556,\n 5.6959, 5.8351, 5.9732, 5.8919, 5.8114, 5.7318, 5.8684, 5.7894, 5.9247,\n 5.8464, 5.7689, 5.9029, 6.0359, 6.1680, 6.0908, 6.0143, 5.9386, 6.0693,\n 6.1990, 6.1237, 6.0491, 5.9752, 6.1036, 6.2312, 6.3580, 6.2843, 6.2113,\n 6.1389, 6.0671, 6.1926, 6.1213, 6.0506, 5.9805, 6.1047, 6.2282, 6.3509,\n 6.2810, 6.2116, 6.1429, 6.0746, 6.1961, 6.3168, 6.2489, 6.1815, 6.3013,\n 6.4203, 6.5387, 6.4715, 6.4048, 6.3385, 6.2728, 6.2075, 6.3247, 6.2598,\n 6.1954, 6.3117, 6.4274, 6.5424, 6.4781, 6.4143, 6.3509, 6.2879, 6.4019,\n 6.5153, 6.4526, 6.3902, 6.5029, 6.6150, 6.7264, 6.6642, 6.6024, 6.5410,\n 6.6517, 6.5906, 6.7006, 6.6398, 6.5794, 6.6887, 6.7974, 6.9056, 6.8454,\n 6.7854, 6.7259, 6.6667, 6.6078, 6.7151, 6.6565, 6.5983, 6.7049, 6.8111,\n 6.9167])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 1.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man shoots a man.\nSentence 2: A man with a pistol shoots another man.\nSimilarity score:", + "true_label": 4.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.3943, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.0435, 0.9901,\n 1.1345, 1.0812, 1.2243, 1.3663, 1.3128, 1.2597, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.3288, 1.2771, 1.4142, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.4792, 1.4284, 1.5617, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.8033, 1.7529, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.8598, 1.8102, 1.7609, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.7693, 1.8935, 1.8453, 1.7974, 1.9206, 2.0430,\n 1.9950, 2.1167, 2.2377, 2.3580, 2.3098, 2.2618, 2.3812, 2.3333,\n 2.2857, 2.4042, 2.3567, 2.3094, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.5627, 2.8098, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 2.7852, 2.6681, 2.5538, 2.7791, 3.0000,\n 2.8868, 3.1027, 3.3147, 3.2026, 3.0929, 2.9856, 3.1918, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.9814, 2.8830, 3.0796, 3.2733, 3.1754,\n 3.0793, 2.9848, 2.8919, 3.0806, 2.9887, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.3558, 3.2667, 3.4438, 3.3556, 3.5301, 3.7025, 3.6148,\n 3.5283, 3.4429, 3.6122, 3.5277, 3.4442, 3.3619, 3.2806, 3.4466,\n 3.3659, 3.5298, 3.6919, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.4586, 3.6159, 3.7717, 3.9260, 3.8490, 4.0016, 3.9253, 3.8497,\n 3.7750, 3.7011, 3.6279, 3.7778, 3.7051, 3.8534, 4.0004, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.8596, 3.7897, 3.9331, 4.0753, 4.2164,\n 4.1464, 4.0771, 4.2167, 4.3552, 4.2861, 4.4234, 4.5596, 4.6949,\n 4.6258, 4.7599, 4.6912, 4.8242, 4.7559, 4.8878, 5.0187, 5.1488,\n 5.0806, 5.2096, 5.3378, 5.2699, 5.2025, 5.1357, 5.2626, 5.1962,\n 5.1303, 5.0649, 5.1905, 5.3153, 5.4393, 5.5626, 5.4971, 5.4322,\n 5.5544, 5.6760, 5.6112, 5.5470, 5.6675, 5.7874, 5.9065, 5.8424,\n 5.7787, 5.8969, 6.0145, 6.1314, 6.0678, 6.1839, 6.2994, 6.2361,\n 6.3509, 6.4650, 6.5785, 6.5153, 6.6282, 6.5653, 6.5029, 6.6150,\n 6.7264, 6.6642, 6.6024, 6.7132, 6.8233, 6.9330, 7.0420, 7.1506,\n 7.2585, 7.1967, 7.3041, 7.2425, 7.3493, 7.4556, 7.5614, 7.5000,\n 7.4390, 7.5441, 7.6488, 7.7530, 7.6922, 7.7958, 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man plays the guitar.\nSentence 2: The man sang and played his guitar.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.2493, 11.1621,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.3423, 11.2589, 11.3616, 11.2789, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.1746, 13.0956, 13.0171, 12.9391,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.1520, 13.2429, 13.1667,\n 13.0910, 13.1815, 13.1063, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A guy is playing hackysack\nSentence 2: A man is playing a key-board.\nSimilarity score:", + "true_label": 1.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.2894, -0.3299, -0.3702, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.2121, 8.0928, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.2035, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 8.0495, 7.9455, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.2372, 8.1481, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.1414, 9.2554, 9.1735, 9.0923, 9.0117, 8.9319, 8.8527,\n 8.9660, 9.0786, 9.0000, 8.9221, 8.8448, 8.7681, 8.6921, 8.8039,\n 8.9151, 8.8396, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.7072, 9.6322, 9.7380, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.4909, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.6455, 10.5725, 10.6722, 10.7714, 10.8702, 10.7978, 10.7258,\n 10.6544, 10.5833, 10.6817, 10.6111, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.6944, 10.7910, 10.7222, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: One man is breaking cement on another man's chest.\nSentence 2: A man breaks cinder blocks on another man.\nSimilarity score:", + "true_label": 3.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.1707, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, 0.1382, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.4233, 0.5489, 0.5053, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 8.9763, 8.8833, 9.0057, 9.1273, 9.2480, 9.1561, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.9625, 10.0748, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.2348, 10.3445, 10.4537, 10.3683, 10.2837, 10.1999,\n 10.3085, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.7006, 10.8051, 10.9091, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.4819, 11.4031,\n 11.3249, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.1148, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.3091, 11.4047, 11.3333,\n 11.4286, 11.5235, 11.4525, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is talking with other women on the beach.\nSentence 2: A man is walking down the street.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.2982, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.6681, 2.8943, 2.7791, 3.0000,\n 2.8868, 3.1027, 3.3147, 3.5228, 3.4101, 3.2998, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 4.1461, 4.0415,\n 3.9386, 3.8376, 3.7383, 3.9208, 3.8228, 3.7264, 3.9056, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.6187, 3.5301, 3.4427, 3.6148,\n 3.7849, 3.6979, 3.8657, 4.0316, 4.1957, 4.1090, 4.0234, 4.1851,\n 4.1003, 4.2601, 4.1761, 4.3339, 4.2507, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.7001, 4.6188, 4.5384, 4.6876, 4.8355, 4.7556,\n 4.9019, 5.0469, 5.1908, 5.1111, 5.0323, 4.9543, 4.8772, 4.8008,\n 4.9424, 5.0829, 5.0070, 5.1461, 5.2842, 5.4212, 5.3455, 5.2705,\n 5.1962, 5.1225, 5.2578, 5.1848, 5.1123, 5.2463, 5.3793, 5.3072,\n 5.2358, 5.1650, 5.0948, 5.0252, 4.9562, 5.0873, 5.0187, 5.1488,\n 5.0806, 5.2096, 5.3378, 5.4650, 5.3970, 5.3295, 5.2626, 5.1962,\n 5.1303, 5.2560, 5.3810, 5.3153, 5.4393, 5.5626, 5.6851, 5.6195,\n 5.5544, 5.6760, 5.6112, 5.7319, 5.6675, 5.7874, 5.7234, 5.8424,\n 5.9607, 6.0784, 6.0145, 5.9510, 5.8880, 6.0047, 5.9420, 6.0579,\n 6.1732, 6.1107, 6.2253, 6.3392, 6.4526, 6.3902, 6.3283, 6.4409,\n 6.3793, 6.4911, 6.4298, 6.5410, 6.4800, 6.5906, 6.7006, 6.8101,\n 6.7492, 6.6887, 6.6285, 6.5688, 6.5094, 6.6179, 6.7259, 6.6667,\n 6.7740, 6.8809, 6.9873, 6.9282, 6.8695, 6.8111, 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The woman is dicing onions.\nSentence 2: A woman is dancing in railway station.\nSimilarity score:", + "true_label": 0.4000000059604645, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.3478, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.7778, -1.6496, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 2.5924, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 3.9337, 3.7905, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.5543, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.0183, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.0891, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.3455, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.3113, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.5985, 13.6896, 13.7803, 13.8707, 13.9606,\n 14.0502, 13.9700, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A chef is preparing some food.\nSentence 2: A chef prepared a meal.\nSimilarity score:", + "true_label": 4.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.3189, -0.3705, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.2791, -0.1391, -0.1849, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.2100, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.0792, 2.9424, 2.8098, 3.0509, 2.9212,\n 2.7952, 3.0290, 3.2577, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.0446, 3.9284, 4.1260, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.4721, 4.6571, 4.5461, 4.7281, 4.9075,\n 4.7980, 4.9747, 5.1490, 5.0410, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.7242, 5.6220, 5.5213, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.5435, 5.6986, 5.8522, 6.0041, 6.1546,\n 6.0587, 5.9641, 5.8707, 5.7785, 5.9270, 6.0740, 5.9827, 6.1283,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.3768, 6.2883, 6.4283, 6.3408,\n 6.2541, 6.3928, 6.3070, 6.4444, 6.5807, 6.7159, 6.8500, 6.7648,\n 6.6804, 6.5970, 6.7298, 6.6471, 6.5653, 6.6968, 6.6157, 6.7462,\n 6.8757, 7.0043, 7.1319, 7.0513, 6.9714, 6.8922, 6.8138, 6.7361,\n 6.6591, 6.7854, 6.7090, 6.8343, 6.9587, 7.0823, 7.2051, 7.1291,\n 7.0537, 6.9789, 6.9048, 7.0265, 7.1474, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.4762, 7.4034, 7.3312, 7.2596, 7.1886, 7.1181,\n 7.2348, 7.1647, 7.2807, 7.3960, 7.5106, 7.6246, 7.7380, 7.8507,\n 7.9628, 8.0742, 8.0042, 8.1150, 8.2252, 8.3349, 8.4439, 8.3742,\n 8.3050, 8.4133, 8.3446, 8.4523, 8.5595, 8.6662, 8.5978, 8.5298,\n 8.4623, 8.5683, 8.5012, 8.4345, 8.3683, 8.3024, 8.4078, 8.5126,\n 8.4471, 8.5513, 8.6551, 8.7584, 8.8612, 8.7959, 8.7311, 8.6667,\n 8.7689, 8.7048, 8.6411, 8.7427, 8.6794, 8.7805, 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man talked to a girl over the internet camera.\nSentence 2: A teenager talks to a girl over a webcam.\nSimilarity score:", + "true_label": 2.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.1057, -2.1470, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.5325, -3.5648, -3.4316, -3.4641, -3.4964, -3.5286, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "65.2%", + "z-score": "13", + "p value": "3.27e-39", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.3249, 6.2106, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.3509, 6.2483, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.9079, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.3617, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.1692, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.9545,\n 11.0569, 10.9773, 11.0793, 11.0004, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.1473, 11.2473, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A turtle walks over the ground.\nSentence 2: A large turtle crawls in the grass.\nSimilarity score:", + "true_label": 3.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.2357,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.2646, 0.4216,\n 0.5774, 0.5227, 0.6768, 0.6222, 0.5680, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.7807, 0.7293, 0.6783, 0.8208, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.3522, 0.3073, 0.2626, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 10.8477, 10.7348, 10.6232, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.0077, 12.1164, 12.2244, 12.1200, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.0286, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.3586, 12.4625, 12.3655, 12.2694, 12.3729, 12.2778,\n 12.1836, 12.0902, 11.9977, 12.1012, 12.2040, 12.3063, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.4286, 12.5289, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.6713, 12.7690, 12.8661, 12.7802,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.2791, 13.3728, 13.2895, 13.2068, 13.3002, 13.3933, 13.4859, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.5985, 13.6896, 13.6091, 13.6999, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.8113, 13.9007, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.0106, 14.0986, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The lady cracked an egg for the mixer.\nSentence 2: The lady sliced up the meat.\nSimilarity score:", + "true_label": 1.1540000438690186, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401, -0.9909, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.6630, -1.7028, -1.7424, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.0792, 2.9424, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.0123, 3.2348, 3.4528, 3.3333,\n 3.5466, 3.4293, 3.6380, 3.8431, 3.7273, 3.6141, 3.5032, 3.7033,\n 3.5942, 3.7905, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.6568, 4.5544, 4.7278, 4.6268,\n 4.5274, 4.6981, 4.6000, 4.7683, 4.9346, 4.8375, 4.7419, 4.6476,\n 4.5547, 4.4630, 4.6262, 4.5356, 4.6967, 4.8561, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.5747, 4.4901, 4.6448, 4.7980, 4.7140,\n 4.6311, 4.7823, 4.7001, 4.6188, 4.7682, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.9675, 4.8889, 4.8111, 4.9543, 4.8772, 5.0190,\n 5.1597, 5.0829, 5.0070, 4.9317, 4.8572, 4.7834, 4.9221, 4.8488,\n 4.9862, 5.1225, 5.0496, 4.9774, 4.9058, 4.8348, 4.7645, 4.8990,\n 4.8291, 4.9624, 5.0948, 5.0252, 4.9562, 4.8878, 4.8200, 4.7527,\n 4.8833, 4.8164, 4.9460, 5.0747, 5.0080, 4.9419, 4.8763, 4.8113,\n 4.7467, 4.8737, 4.8095, 4.9356, 5.0609, 4.9969, 4.9333, 5.0576,\n 4.9943, 4.9316, 5.0548, 4.9923, 5.1146, 5.2362, 5.1739, 5.1121,\n 5.0507, 5.1711, 5.1100, 5.2297, 5.1689, 5.2877, 5.4059, 5.3452,\n 5.2850, 5.4023, 5.3423, 5.2827, 5.3991, 5.3398, 5.4554, 5.5705,\n 5.5113, 5.4526, 5.3941, 5.3361, 5.2784, 5.3923, 5.3349, 5.4480,\n 5.5606, 5.5033, 5.4464, 5.3898, 5.3335, 5.2776, 5.3891, 5.3333,\n 5.4442, 5.5545, 5.4989, 5.4436, 5.3886, 5.4981, 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Five kittens are eating out of five dishes.\nSentence 2: Kittens are eating food on trays.\nSimilarity score:", + "true_label": 2.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.0000, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.5119, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.7178, 1.6499,\n 1.5828, 1.7496, 1.9149, 1.8475, 1.7809, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.7143, 1.8716, 1.8084, 1.7457,\n 1.9009, 2.0548, 1.9920, 1.9298, 2.0817, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.7772, 1.7179, 1.8665, 2.0140, 1.9545, 1.8956, 2.0412,\n 2.1858, 2.3293, 2.2699, 2.2111, 2.1527, 2.0948, 2.2361, 2.1783,\n 2.1210, 2.2608, 2.3995, 2.3422, 2.2852, 2.4225, 2.5589, 2.6943,\n 2.6370, 2.5802, 2.5238, 2.4678, 2.4122, 2.3570, 2.3022, 2.4351,\n 2.5672, 2.5123, 2.4578, 2.5886, 2.7186, 2.8478, 2.7930, 2.7386,\n 2.8666, 2.8124, 2.7585, 2.8853, 2.8316, 2.7783, 2.7253, 2.6726,\n 2.6203, 2.5683, 2.6932, 2.6414, 2.5898, 2.5386, 2.6623, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.4099, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.4065, 2.3580, 2.3098, 2.2618, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.3567, 2.4744, 2.4269, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 3.9337, 3.7905, 4.0166,\n 3.8772, 4.0980, 3.9620, 4.1779, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.5137, 6.4065, 6.5607, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.5514, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.6973, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.9303, 7.0657, 6.9768, 7.1111, 7.0231, 7.1563, 7.2884, 7.4194,\n 7.3322, 7.2459, 7.1605, 7.0759, 7.2058, 7.3346, 7.4625, 7.3786,\n 7.5056, 7.4225, 7.3402, 7.4661, 7.5910, 7.5094, 7.6335, 7.7567,\n 7.6758, 7.5955, 7.7178, 7.8393, 7.9600, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.1585, 8.0801, 8.0024, 8.1198, 8.0427, 8.1594, 8.0829,\n 8.0070, 7.9318, 7.8571, 7.9729, 8.0880, 8.0139, 7.9403, 8.0546,\n 8.1683, 8.0952, 8.0227, 7.9507, 7.8793, 7.9921, 7.9211, 7.8507,\n 7.7808, 7.7114, 7.6424, 7.7544, 7.6859, 7.6179, 7.7291, 7.8397,\n 7.7720, 7.8820, 7.8147, 7.7480, 7.6816, 7.6158, 7.5503, 7.4853,\n 7.5944, 7.7028, 7.6381, 7.7460, 7.6816, 7.6177, 7.7249, 7.6613,\n 7.5981, 7.7047, 7.8107, 7.7478, 7.8533, 7.7907, 7.7285, 7.6667,\n 7.7715, 7.8758, 7.8142, 7.7530, 7.6922, 7.7958, 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is slicing some tuna.\nSentence 2: A woman is cutting raw fish.\nSimilarity score:", + "true_label": 2.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.9%", + "z-score": "3.18", + "p value": "0.000727", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.5852,\n 1.5213, 1.6823, 1.8419, 1.7778, 1.7143, 1.6514, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.8682, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.8091, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.8962, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.9524, 1.8983, 1.8446, 1.9799, 2.1143, 2.0605,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.9813, 1.9291, 2.0604, 2.1909,\n 2.1386, 2.0866, 2.0350, 1.9837, 2.1125, 2.2406, 2.1892, 2.3163,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.2387, 2.1884, 2.3131, 2.4371,\n 2.5604, 2.5099, 2.6323, 2.7541, 2.7036, 2.8245, 2.9448, 2.8943,\n 2.8440, 2.9633, 3.0821, 3.0317, 2.9817, 2.9320, 3.0496, 3.1667,\n 3.1169, 3.0674, 3.1836])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.5384, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.7442, 4.6667, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.5448,\n 5.6725, 5.7994, 5.7295, 5.6602, 5.5915, 5.5233, 5.4557, 5.5811,\n 5.5138, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.7411, 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.7787, 5.7155, 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 6.1632, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man playing the guitar.\nSentence 2: A woman is painting her lips.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.2982, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.0702, 8.9763, 9.0987, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.2209, 12.3143, 12.2403, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is playing a flute.\nSentence 2: A dog is barking at a fly.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.6030, 0.5507, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.6029, 0.5547, 0.5069, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.9742, 0.9272, 0.8805, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.8607, 0.8154, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.8333,\n 0.7896, 0.7461, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 10.8477, 10.7348, 10.6232, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.0077, 12.1164, 12.2244, 12.1200, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.0286, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.3586, 12.4625, 12.3655, 12.2694, 12.3729, 12.2778,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.4081, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.1198, 13.0311, 13.1279, 13.0400, 13.1364, 13.2324, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.4390, 13.3537, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.1974, 14.2870, 14.3762, 14.4651, 14.5535, 14.6416,\n 14.7293, 14.8167, 14.9037, 14.8219, 14.9086, 14.8274, 14.7468, 14.6667,\n 14.5871, 14.5080, 14.4294, 14.3513, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A baby rhino is walking around his pen with his mother.\nSentence 2: A baby rhino is following an adult rhino.\nSimilarity score:", + "true_label": 3.4000000953674316, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.2418, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.9336, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -2.8150, -2.6296, -2.6737, -2.7175,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.8368,\n -2.8786, -2.9200, -2.9611, -2.7875, -2.8289, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -3.0923, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.2348, -3.2717,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.4528, -3.2998, -3.1479, -3.1844,\n -3.2206, -3.0706, -3.1071, -3.1433, -3.1794, -3.2152, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.2811, -3.3160, -3.1720, -3.2071,\n -3.2420, -3.2768, -3.3113, -3.3457, -3.3799, -3.4140, -3.4478, -3.4816,\n -3.5151, -3.3754, -3.4091, -3.4427, -3.4760, -3.5093, -3.3716, -3.4050,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.5697, -3.6021, -3.6345, -3.5000,\n -3.5325, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.5627, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.0290, 2.9055, 2.7852, 3.0123, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 4.1260, 4.0119,\n 3.9001, 3.7905, 3.6831, 3.8759, 3.7700, 3.9595, 4.1461, 4.0415,\n 3.9386, 3.8376, 3.7383, 3.9208, 3.8228, 4.0024, 4.1797, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.9736, 3.8819, 4.0531, 4.2222, 4.1312,\n 4.0415, 3.9530, 4.1192, 4.2836, 4.1957, 4.3580, 4.2710, 4.1851,\n 4.1003, 4.2601, 4.1761, 4.0931, 4.0112, 4.1684, 4.0872, 4.2426,\n 4.1621, 4.3158, 4.2359, 4.1569, 4.0788, 4.2303, 4.1528, 4.0762,\n 4.0004, 3.9254, 3.8512, 4.0000, 3.9263, 4.0736, 4.2196, 4.1461,\n 4.0734, 4.0015, 3.9302, 4.0740, 4.0032, 4.1457, 4.2870, 4.2164,\n 4.1464, 4.2862, 4.2167, 4.3552, 4.2861, 4.4234, 4.3547, 4.2866,\n 4.2191, 4.3548, 4.2877, 4.2212, 4.1552, 4.0898, 4.0249, 4.1586,\n 4.0941, 4.2267, 4.3583, 4.2940, 4.2301, 4.1667, 4.1038, 4.2339,\n 4.1713, 4.3004, 4.2381, 4.1763, 4.1150, 4.2426, 4.1816, 4.1210,\n 4.0608, 4.0011, 3.9418, 4.0678, 4.0087, 4.1338, 4.2582, 4.1992,\n 4.1406, 4.0825, 4.2056, 4.3280, 4.2699, 4.3915, 4.3336, 4.2762,\n 4.2191, 4.3395, 4.2827, 4.2262, 4.1700, 4.1143, 4.0589, 4.1779,\n 4.1226, 4.2409, 4.3585, 4.3033, 4.2485, 4.1940, 4.1399, 4.2563,\n 4.2023, 4.3180, 4.2642, 4.2108, 4.1576, 4.2723, 4.2193, 4.1667,\n 4.1143, 4.0622, 4.0105, 4.1239, 4.0723, 4.1851, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The man used a sword to slice a plastic bottle.\nSentence 2: A man sliced a plastic bottle with a sword.\nSimilarity score:", + "true_label": 5.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.0947, 8.9544, 9.0990, 9.2418, 9.1051, 8.9709, 8.8389, 8.9815,\n 8.8522, 8.9935, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.4705, 9.6011,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.5131, 10.6329, 10.7518, 10.8699,\n 10.7616, 10.8790, 10.9955, 11.1111, 11.0047, 10.8995, 10.7955, 10.9109,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.0488, 10.9488, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.0883, 10.9917, 10.8960, 11.0070, 10.9123, 10.8186,\n 10.9291, 11.0389, 10.9462, 11.0554, 10.9637, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 11.8571, 11.9594, 12.0611, 11.9737, 11.8870, 11.8010,\n 11.9024, 12.0032, 11.9181, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.8160, 12.9116, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.3002, 13.2182, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.8007, 13.8904, 13.8113, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.2546, 14.1764, 14.0986, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 2.5 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is cleaning a fish on a kitchen counter.\nSentence 2: A man is playing a flute.\nSimilarity score:", + "true_label": 0.800000011920929, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.5298, 0.4606, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.8926, 0.8238, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.0289, 1.2039, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 1.0050, 0.9512, 1.0973, 1.0435, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.3663, 1.3128, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.5714, 1.7085, 1.6554, 1.7913, 1.9263, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.6230, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.6827, 1.6336, 1.7609, 1.7119, 1.8383, 1.9640, 1.9149,\n 1.8660, 1.9906, 1.9419, 1.8935, 1.8453, 1.7974, 1.7498, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.6843, 1.8058, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 10.9955, 11.1111, 11.2259, 11.3399, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 11.7992, 11.9083, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.2722, 13.3710, 13.4691, 13.5668, 13.4715,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.9530, 14.0479, 13.9543, 14.0488,\n 14.1428, 14.2364, 14.1440, 14.2373, 14.3301, 14.4225, 14.5144, 14.6059,\n 14.6970, 14.6062, 14.6970, 14.7874, 14.8773, 14.7877, 14.8773, 14.9666,\n 15.0555, 15.1440, 15.2321, 15.3198, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.4057, 15.4922, 15.5783, 15.6641, 15.7495, 15.8345, 15.9193, 15.8334,\n 15.9179, 16.0020, 16.0858, 16.0009, 16.0845, 16.1678, 16.2507, 16.3333,\n 16.4156, 16.4976, 16.4139, 16.4957, 16.5772, 16.6584, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A person is chopping coriander leaves.\nSentence 2: A woman is slicing up some green leaves.\nSimilarity score:", + "true_label": 2.25, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n 0.1925, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.0543, -0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.0930, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.0886, 0.0442, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.3004, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.5053, 0.4620, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.4949, 0.6170, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.0342, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.1933, 11.2924, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.6217, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.0209, 11.9487, 11.8769, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man is pressing microwave buttons.\nSentence 2: A man turns on the microwave.\nSimilarity score:", + "true_label": 2.75, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.6131, -2.6533, -2.6933, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.5769, -2.6163, -2.6554, -2.5019,\n -2.5412, -2.5802, -2.4286, -2.4678, -2.5068, -2.5456, -2.3962, -2.4351,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.3735,\n -2.4116, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.7894, -2.6534, -2.6888, -2.7240,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.7013, -2.7358, -2.7701, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "56.1%", + "z-score": "10.1", + "p value": "4.05e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 3.7626, 4.0012, 3.8490, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.4836, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 5.8789, 6.0404, 5.9333, 5.8279, 5.7242, 5.6220, 5.5213, 5.4222,\n 5.5811, 5.4832, 5.6401, 5.7955, 5.6986, 5.8522, 5.7566, 5.9084,\n 5.8139, 5.9641, 6.1128, 6.2601, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.7414, 6.8819, 6.7890, 6.6973, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.2443, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.7192, 7.8463, 7.9724, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.0453, 8.9612, 9.0773, 8.9940, 8.9113,\n 8.8294, 8.7482, 8.6677, 8.7831, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.0786, 9.0000, 9.1119, 9.2232, 9.1452, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.7526, 9.8590,\n 9.7828, 9.7072, 9.6322, 9.5577, 9.4837, 9.5896, 9.5161, 9.4432,\n 9.3708, 9.4761, 9.5808, 9.6850, 9.6130, 9.7167, 9.6452, 9.5743,\n 9.5038, 9.4338, 9.5369, 9.6394, 9.5698, 9.5007, 9.4320, 9.5341,\n 9.4658, 9.3980, 9.3306, 9.4321, 9.5331, 9.4661, 9.5666, 9.6667,\n 9.7663, 9.8654, 9.9641, 10.0624])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A girl is riding a horse.\nSentence 2: The girl trotted the horse.\nSimilarity score:", + "true_label": 4.5, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.0079, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.9074, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -0.9304, -0.7807, -0.8266, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.6149, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.7641, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "56.1%", + "z-score": "10.1", + "p value": "2.95e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.9424, 3.1844, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.6681, 2.5538, 2.7791, 3.0000,\n 3.2167, 3.1027, 2.9913, 3.2026, 3.4101, 3.6141, 3.8146, 4.0119,\n 3.9001, 3.7905, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.9747, 4.8669, 5.0410, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.2601, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.9759, 6.8819, 6.7890, 6.9282, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.5556, 7.6867, 7.5967, 7.7268, 7.6376,\n 7.7667, 7.8948, 7.8065, 7.7192, 7.8463, 7.9724, 7.8859, 7.8003,\n 7.9254, 7.8406, 7.7566, 7.6734, 7.7976, 7.9209, 7.8384, 7.9608,\n 8.0824, 8.0006, 8.1214, 8.2413, 8.3605, 8.2793, 8.3977, 8.3172,\n 8.4348, 8.5516, 8.4718, 8.5879, 8.7033, 8.8179, 8.9319, 8.8527,\n 8.9660, 8.8874, 8.8095, 8.9221, 9.0340, 8.9567, 8.8800, 8.9912,\n 9.1018, 9.2118, 9.3212, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.2368, 9.3443, 9.4513, 9.3774, 9.3040, 9.4103, 9.5161, 9.4432,\n 9.3708, 9.4761, 9.5808, 9.5089, 9.4375, 9.5416, 9.4707, 9.5743,\n 9.6774, 9.6069, 9.5369, 9.6394, 9.5698, 9.5007, 9.4320, 9.5341,\n 9.6356, 9.7367, 9.8373, 9.7690, 9.8691, 9.8012, 9.7337, 9.8333,\n 9.9325, 10.0312, 10.1295, 10.2273, 10.1602, 10.0935])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: Batman and Robin fly a helicopter over water.\nSentence 2: A helicopter flies over water.\nSimilarity score:", + "true_label": 2.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.9608, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.5407, -0.5922, -0.4288, -0.4804, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.4257, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.1357, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.3702, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.0387, 6.9204, 7.0763,\n 6.9601, 6.8458, 6.7333, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.7414, 7.8782, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.0000, 7.9079, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.1553, 9.2729, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.1615, 9.2768, 9.1927, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.3686, 9.4812, 9.5931, 9.7044, 9.6225,\n 9.5413, 9.4608, 9.3810, 9.4916, 9.4124, 9.3338, 9.4438, 9.3659,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.4132, 11.5109, 11.6082, 11.5329, 11.6297, 11.7261, 11.8221, 11.9176,\n 12.0127, 11.9380, 11.8638, 11.7901, 11.8849, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman pours oil into a skillet from a plastic bottle while she is talking.\nSentence 2: An older woman is pouring oil into a skillet on the stove.\nSimilarity score:", + "true_label": 3.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -2.0641, -2.1082,\n -1.9420, -1.9863, -2.0303, -2.0739, -1.9107, -1.9545, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.3264, -2.3635, -2.4004, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.3835,\n -2.4195, -2.4553, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "63.1%", + "z-score": "12.4", + "p value": "1.46e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.2164, 6.3890, 6.2610, 6.1355, 6.0125, 6.1828, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.0822, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.3625, 8.2733, 8.3976, 8.3093, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.1615, 9.2768, 9.1927, 9.1094,\n 9.0267, 8.9448, 9.0595, 9.1735, 9.2867, 9.2055, 9.1250, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.6016, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.4581, 11.5549, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.8638, 11.7901, 11.8849, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.1141, 12.2068, 12.2992, 12.3912])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is cutting some herbs.\nSentence 2: A woman is chopping cilantro.\nSimilarity score:", + "true_label": 2.799999952316284, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.2828, 0.2349, 0.3746,\n 0.3267, 0.4652, 0.4174, 0.5547, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.3800, 0.3369, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.1451, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.4182, 4.3339, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.6311, 4.5491, 4.4680, 4.3879, 4.5384, 4.6876, 4.8355, 4.9820,\n 4.9019, 4.8226, 4.7442, 4.6667, 4.5899, 4.7341, 4.6580, 4.8008,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.5448,\n 5.6725, 5.7994, 5.7295, 5.6602, 5.5915, 5.5233, 5.4557, 5.5811,\n 5.5138, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.6195,\n 5.7411, 5.8621, 5.9822, 5.9168, 5.8519, 5.7874, 5.7234, 5.6598,\n 5.7787, 5.7155, 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 6.1632, 6.1014, 6.0401, 5.9792, 5.9186,\n 5.8585, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The lady put the skewered shimp in the hot water.\nSentence 2: The lady fried the breaded meat in hot oil.\nSimilarity score:", + "true_label": 1.600000023841858, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.4762, -1.3288, -1.1825, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.7462, -0.7878, -0.8292, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.8301, 2.6943, 2.9424, 2.8098, 3.0509, 3.2863,\n 3.1558, 3.3853, 3.6098, 3.8297, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.6510, 11.7543, 11.8571, 11.9594, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.0891, 12.1893, 12.2891, 12.3883, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.7199, 12.8160, 12.9116, 13.0067,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.3227, 13.4150, 13.5069, 13.5985, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.7106, 13.8007, 13.8904, 13.9797, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is riding on a horse.\nSentence 2: A man is shooting off guns.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.1371, 5.0000,\n 4.8662, 5.0623, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.0553, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.9079, 7.8168, 7.9460, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.7267, 8.8443, 8.9612, 8.8778, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.6148, 9.7224, 9.6456, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.9184, 9.8433, 9.9481, 10.0523, 9.9778,\n 10.0814, 10.0074, 10.1106, 10.0371, 9.9642, 10.0668, 9.9944, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 10.8770, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 10.8872, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The boy is playing the piano.\nSentence 2: A band is playing on stage.\nSimilarity score:", + "true_label": 1.3329999446868896, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.5551, 0.7857, 1.0120, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.4003, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.9864, 1.1547, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 1.0139, 1.1547,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.4792, 1.4284, 1.5617, 1.5110, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 0.9313, 1.0555, 1.1790, 1.3019, 1.4241, 1.5457, 1.5000,\n 1.4546, 1.5752, 1.5298, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.6%", + "z-score": "11.9", + "p value": "6e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.1828, 6.0622,\n 5.9438, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.8641, 7.0133, 7.1611, 7.0553, 6.9511, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.0000, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.3617, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.9542, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.3333, 10.4407, 10.5475, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.2602, 11.1807, 11.2816, 11.3820, 11.3032, 11.2250,\n 11.1473, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.5109, 11.4356, 11.5329, 11.4581, 11.5549, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.7169, 11.6441, 11.5718, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056, 11.8988])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: The cook is kneading the flour.\nSentence 2: A woman is kneading tortilla dough.\nSimilarity score:", + "true_label": 2.5999999046325684, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.3504, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.2689, 0.4021, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.3369, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.8868, 3.1027, 3.3147, 3.2026, 3.0929, 2.9856, 3.1918, 3.3947,\n 3.5942, 3.4873, 3.3824, 3.5777, 3.7700, 3.6662, 3.5642, 3.4641,\n 3.6522, 3.8376, 3.7383, 3.6407, 3.5447, 3.7264, 3.9056, 4.0825,\n 3.9869, 4.1612, 4.3333, 4.5034, 4.4083, 4.5760, 4.4820, 4.3894,\n 4.5547, 4.7181, 4.6262, 4.7875, 4.9472, 4.8561, 4.7662, 4.9237,\n 4.8347, 4.9904, 5.1444, 5.0562, 4.9691, 5.1212, 5.2719, 5.4212,\n 5.5690, 5.7155, 5.8606, 6.0044, 6.1470, 6.2883, 6.4283, 6.5672,\n 6.7049, 6.8414, 6.9768, 7.1111, 7.2443, 7.3765, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.5048, 7.6328, 7.5472, 7.6742, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 7.9608,\n 7.8791, 8.0006, 7.9196, 7.8393, 7.9600, 7.8803, 8.0002, 8.1192,\n 8.2375, 8.3550, 8.2760, 8.3927, 8.3143, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.6190, 8.7323, 8.8448, 8.9567, 8.8800, 8.8039,\n 8.7284, 8.8396, 8.9502, 9.0601, 9.1694, 9.0944, 9.0200, 9.1287,\n 9.2368, 9.1629, 9.0895, 9.1970, 9.1242, 9.2311, 9.1587, 9.2651,\n 9.1932, 9.2990, 9.4042, 9.5089, 9.4375, 9.3665, 9.2961, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.6394, 9.5698, 9.6719, 9.7735, 9.7043,\n 9.6356, 9.7367, 9.6684, 9.7690, 9.8691, 9.9687, 10.0679, 10.1667,\n 10.0987, 10.0312, 10.1295, 10.2273, 10.3248, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is riding a horse along a perimeter.\nSentence 2: A dog is riding a skateboard.\nSimilarity score:", + "true_label": 0.6000000238418579, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 10.8477, 10.7348, 10.6232, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.0077, 12.1164, 12.2244, 12.1200, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.0286, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.3586, 12.4625, 12.3655, 12.2694, 12.3729, 12.2778,\n 12.1836, 12.2868, 12.3895, 12.2963, 12.3985, 12.5001, 12.4081, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.1198, 13.0311, 13.1279, 13.0400, 13.1364, 13.2324, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.4390, 13.3537, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.1974, 14.2870, 14.3762, 14.4651, 14.5535, 14.6416,\n 14.7293, 14.8167, 14.9037, 14.8219, 14.9086, 14.8274, 14.7468, 14.6667,\n 14.5871, 14.5080, 14.4294, 14.3513, 14.4382, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A woman is putting her baby in a waste bin.\nSentence 2: The woman is poking holes in the potato.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.1707, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.4471, -1.3197, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 2.7406, 2.5924, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 3.9337, 3.7905, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.1118, 5.9954, 6.1612, 6.3249, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.9076, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.8428, 7.7414, 7.8782, 7.7782,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.3695, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.6206, 11.5311, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.4762, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.9341, 12.0341, 11.9504, 11.8673,\n 11.7849, 11.7031, 11.6219, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.4223, 12.5179, 12.4384, 12.3595,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.3163, 12.2397, 12.1635, 12.0878,\n 12.0127, 11.9380, 11.8638, 11.9586, 12.0529, 11.9792, 11.9060, 11.8333,\n 11.7611, 11.8551, 11.9487, 11.8769, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A cat is pouncing on a trampoline.\nSentence 2: A man is slicing a tomato.\nSimilarity score:", + "true_label": 0.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.7789, -0.8199, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.7216, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 4.8177, 4.7002, 4.5850, 4.7703, 4.9528, 4.8394, 5.0190, 4.9075,\n 4.7980, 4.9747, 4.8669, 5.0410, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.9874, 5.8835, 5.7812, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 7.9460, 8.0741,\n 7.9839, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439, 8.6581, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.7044, 9.6225,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.3289, 10.2516, 10.1749, 10.2790, 10.2029, 10.3065, 10.4097, 10.3341,\n 10.4367, 10.5388, 10.4638, 10.5654, 10.6665, 10.7671, 10.8673, 10.7928,\n 10.7189, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.2607, 11.3572, 11.4533, 11.5489, 11.6441, 11.7389, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 5.0 + }, + { + "prompt": "Rate the semantic similarity of the following sentences on a scale from 0 to 5, where 0 means no similarity and 5 means semantic equivalence:\nSentence 1: A man and woman are eating at a table.\nSentence 2: A couple are eating a meal and talking.\nSimilarity score:", + "true_label": 3.0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.5023, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.6433, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.2828, 0.4229, 0.3746,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.6912, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.8540, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.8805, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.5053, 0.4620, 0.4189, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.9245, 1.8034, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.6667,\n 1.5671, 1.7963, 2.0211, 1.9215, 1.8240, 1.7285, 1.6348, 1.8516,\n 2.0647, 1.9711, 1.8791, 2.0870, 2.2916, 2.1997, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.5205, 2.7107, 2.6222, 2.8093, 2.7217,\n 2.9057, 2.8189, 3.0000, 2.9140, 3.0924, 3.2686, 3.4427, 3.3566,\n 3.2717, 3.4429, 3.6122, 3.5277, 3.6947, 3.8600, 4.0234, 4.1851,\n 4.3451, 4.5035, 4.6603, 4.5747, 4.7296, 4.6448, 4.7980, 4.7140,\n 4.6311, 4.7823, 4.9322, 5.0807, 5.2278, 5.3736, 5.5181, 5.6614,\n 5.5780, 5.7199, 5.8605, 5.7778, 5.6959, 5.8351, 5.9732, 6.1101,\n 6.0287, 6.1644, 6.2991, 6.2183, 6.3517, 6.4842, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.9237, 6.8439, 6.9714, 7.0980, 7.2236, 7.3485,\n 7.4724, 7.5955, 7.7178, 7.8393, 7.7597, 7.8803, 8.0002, 8.1192,\n 8.2375, 8.3550, 8.4718, 8.5879, 8.5088, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.5052, 8.6190, 8.5424, 8.6556, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.2118, 9.3212, 9.2450, 9.3537, 9.4619, 9.3863, 9.3113,\n 9.4188, 9.5258, 9.6322, 9.5577, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.0371, 10.1398, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.4765, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1.0, + "predicted_label_with_watermark": 0.2 + } + ], + "metrics": { + "pearson_corr_without_watermark": 0.01675197019312319, + "pearson_corr_with_watermark": -0.0999659022196252, + "spearman_corr_without_watermark": -0.041560976173932014, + "spearman_corr_with_watermark": -0.07632988977600694 + } + } + }, + "mnli": { + "train": { + "results": [ + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Conceptually cream skimming has two basic dimensions - product and geography.\nHypothesis: Product and geography are what make cream skimming work. \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.9245, 1.8034, 2.0605, 2.3113, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.3333,\n 2.2269, 2.1229, 2.0211, 2.2418, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.2743, 2.4804, 2.3851, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.2404, 2.4327, 2.6222, 2.8093, 2.7217,\n 2.9057, 3.0873, 3.2667, 3.1789, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.0151, 2.9329, 3.1052, 3.0237, 2.9433, 2.8638, 3.0330, 3.2004,\n 3.1211, 3.0429, 3.2077, 3.1300, 3.0533, 3.2157, 3.1394, 3.2998,\n 3.2242, 3.1493, 3.3075, 3.2332, 3.1597, 3.0870, 3.0151, 3.1704,\n 3.3243, 3.4768, 3.4047, 3.5556, 3.7051, 3.8534, 3.7812, 3.7097,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.5762, 3.7205, 3.6519, 3.5839,\n 3.5166, 3.6590, 3.5920, 3.7330, 3.6664, 3.8061, 3.7399, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.8191, 3.7547, 3.6908, 3.6274, 3.7626,\n 3.6995, 3.8335, 3.9666, 4.0988, 4.0356, 4.1667, 4.2970, 4.4264,\n 4.3631, 4.3004, 4.4286, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.1872, 4.1273, 4.2527, 4.1931, 4.3176, 4.2582, 4.3818,\n 4.3226, 4.4454, 4.3865, 4.3280, 4.2699, 4.3915, 4.3336, 4.2762,\n 4.2191, 4.3395, 4.2827, 4.4023, 4.5212, 4.6395, 4.5826, 4.7001,\n 4.8170, 4.9333, 4.8763, 4.8197, 4.9351, 4.8787, 4.8227, 4.7670,\n 4.7117, 4.8260, 4.7709, 4.7161, 4.6616, 4.7749, 4.7206, 4.8333,\n 4.7792, 4.8913, 4.8374, 4.9487, 4.8950, 4.8416, 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.3054, 7.4885, 7.6681,\n 7.8445, 7.6615, 7.4838, 7.3113, 7.4878, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.3485, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.2054, 8.0656, 7.9286, 7.7942,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 8.8405, 8.7277, 8.6164,\n 8.7515, 8.8853, 8.7758, 8.9086, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.9817, 9.8792, 9.7778, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.6490, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.5427, 10.4524, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.3423, 11.2589, 11.1761, 11.0940, 11.0125, 11.1154, 11.2178, 11.1370,\n 11.2389, 11.1588, 11.2602, 11.3610, 11.4614, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.8956, 11.9927, 12.0893, 12.0114,\n 11.9340, 11.8571, 11.7808, 11.7050, 11.8014, 11.8973, 11.8221, 11.9176,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.0529, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.4448, 12.5367, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: you know during the season and i guess at at your level uh you lose them to the next level if if they decide to recall the the parent team the Braves decide to call to recall a guy from triple A then a double A guy goes up to replace him and a single A guy goes up to replace him\nHypothesis: You lose the things to the following level if the people recall.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.8%", + "z-score": "0.574", + "p value": "0.283", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, 0.0000, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.4714, 0.6108, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.3004, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.4949, 0.6170, 0.5744])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "163", + "Fraction of T in Greenlist": "81.9%", + "z-score": "18.5", + "p value": "4.91e-77", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 8.8426, 8.6667,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.6600, 10.7918, 10.9222, 10.7732, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.6189,\n 11.4829, 11.6039, 11.7239, 11.8429, 11.9609, 12.0779, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.4072, 12.2794, 12.3928, 12.5053, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 13.8497, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.6283, 14.5173, 14.4075, 14.5045, 14.6010, 14.6969,\n 14.7924, 14.8873, 14.9817, 15.0756, 15.1690, 15.2619, 15.1556, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.7680, 15.6667, 15.7560, 15.8450, 15.9335, 16.0216, 16.1093,\n 16.1966, 16.2835, 16.3700, 16.4561, 16.3575, 16.2598, 16.3459, 16.4317,\n 16.5171, 16.6021, 16.6868, 16.7711, 16.8550, 16.9386, 17.0218, 16.9265,\n 16.8320, 16.9152, 16.9982, 17.0807, 17.1630, 17.2449, 17.3265, 17.4078,\n 17.4887, 17.5693, 17.4770, 17.3854, 17.4660, 17.5464, 17.6264, 17.7061,\n 17.7856, 17.8647, 17.9435, 18.0221, 18.1003, 18.0107, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.3103, 18.3871, 18.4637, 18.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: One of our number will carry out your instructions minutely.\nHypothesis: A member of my team will execute your orders with immense precision.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.2%", + "z-score": "-0.574", + "p value": "0.717", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.4142, -1.4565, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.1375, -0.9979, -1.0401, -0.9017, -0.7641, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.7139, -0.7539, -0.6266, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.5347, -0.5744])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.7150, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.6719, 10.7843, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.6059, 11.5156, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.7696, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.6367, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.7597, 12.6785, 12.5979, 12.6930, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.1746, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.3967, 13.3196, 13.2429, 13.3333,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.6155, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: How do you know? All this is their information again.\nHypothesis: This information belongs to them.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 1.0915, 1.2577, 1.1946, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.3166, 1.4629, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.3128, 1.2597, 1.2070, 1.3472,\n 1.4863, 1.4335, 1.3810, 1.5187, 1.4662, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.3443, 1.4743, 1.4254,\n 1.5544, 1.6827, 1.6336, 1.5848, 1.5363, 1.4881, 1.4402, 1.5667,\n 1.6925, 1.8175, 1.7693, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.5621, 1.5159, 1.4699, 1.5916, 1.7128, 1.6667,\n 1.6208, 1.7410, 1.6951, 1.8145, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.7009, 3.9158, 4.1265, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.8177, 5.0034, 4.8857, 4.7703, 4.9528, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 4.8305, 4.7278, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.2981, 5.1978, 5.0990, 5.2615, 5.4222,\n 5.3245, 5.2281, 5.3867, 5.5435, 5.6986, 5.8522, 6.0041, 5.9084,\n 6.0587, 5.9641, 5.8707, 5.7785, 5.9270, 6.0740, 5.9827, 5.8926,\n 5.8035, 5.9488, 6.0927, 6.0044, 5.9172, 6.0596, 5.9732, 6.1143,\n 6.2541, 6.1685, 6.0838, 6.0000, 6.1383, 6.2755, 6.4116, 6.5465,\n 6.6804, 6.8133, 6.9451, 7.0759, 7.2058, 7.1220, 7.0391, 6.9570,\n 7.0857, 7.2134, 7.1319, 7.0513, 7.1779, 7.3037, 7.4286, 7.5526,\n 7.6758, 7.7981, 7.9196, 8.0403, 8.1602, 8.2793, 8.1989, 8.1192,\n 8.0402, 8.1585, 8.2760, 8.1976, 8.1198, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 9.0340, 9.1452, 9.2559, 9.1785,\n 9.1018, 9.0257, 9.1357, 9.2450, 9.1694, 9.0944, 9.0200, 9.1287,\n 9.0548, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.7886, 9.7167, 9.8198, 9.9224,\n 9.8510, 9.7800, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.1750, 10.2743, 10.3730, 10.4713, 10.5692, 10.5000,\n 10.4312, 10.3628, 10.4603, 10.5573, 10.4893, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah i tell you what though if you go price some of those tennis shoes i can see why now you know they're getting up in the hundred dollar range\nHypothesis: The tennis shoes have a range of prices.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "31.0%", + "z-score": "1.93", + "p value": "0.0266", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.7321, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 1.4142, 1.7321, 1.5852, 1.4444,\n 1.7457, 2.0370, 1.8974, 1.7628, 1.6330, 1.5076, 1.3862, 1.6590, 1.5396,\n 1.4237, 1.6859, 1.5717, 1.8257, 1.7132, 1.6036, 1.4968, 1.3926, 1.2910,\n 1.5323, 1.4317, 1.3333, 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.4142,\n 1.6348, 1.5430, 1.4530, 1.6678, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275,\n 1.7321, 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 0.9152, 1.1043, 1.0328, 0.9623,\n 0.8926, 0.8238, 1.0079, 1.1898, 1.3697, 1.2999, 1.2309, 1.1628, 1.3389,\n 1.2710, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785, 1.1138, 1.2831, 1.2185,\n 1.3856, 1.3213, 1.2577, 1.1946, 1.3587, 1.2959, 1.2337, 1.1721, 1.1111,\n 1.0507, 0.9909, 1.1508, 1.0911, 1.0319, 1.1896, 1.3460, 1.2865, 1.2276,\n 1.3819, 1.3231, 1.2649, 1.2072, 1.3590, 1.3014, 1.4517, 1.6008, 1.5430,\n 1.6906, 1.6330, 1.5758, 1.5191, 1.6646, 1.6081, 1.5519, 1.6958, 1.6398,\n 1.7823, 1.7264, 1.6710, 1.8119, 1.7566, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.8446, 1.9799, 1.9263, 1.8732, 2.0071,\n 2.1401, 2.0868, 2.0339, 1.9813, 1.9291, 1.8773, 2.0083, 1.9566, 1.9052,\n 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036, 1.5544, 1.6827, 1.6336,\n 1.7609, 1.7119, 1.6632, 1.6148, 1.5667, 1.6925, 1.8175, 1.9419, 1.8935,\n 1.8453, 1.7974, 1.9206, 1.8728, 1.8252, 1.7780, 1.7310, 1.8527, 1.8058,\n 1.7592, 1.8799, 1.8333, 1.9533, 1.9068, 1.8605, 1.8145, 1.9333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.9214, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.0849, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 7.9472, 7.8296, 7.7139, 7.6000, 7.4878,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.0822, 8.2178, 8.3521, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.7447, 9.6490, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.9547, 10.0698, 9.9783, 9.8877, 9.7980,\n 9.7091, 9.6210, 9.5338, 9.6484, 9.5620, 9.4763, 9.5902, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.9091, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.4411, 11.5414, 11.4614, 11.3820, 11.4819, 11.5813,\n 11.5026, 11.4244, 11.5234, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.5109, 11.4356, 11.3608, 11.2864, 11.3837, 11.3099, 11.2366,\n 11.3335, 11.2607, 11.3572, 11.4533, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.7611, 11.8551, 11.9487, 11.8769, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: my walkman broke so i'm upset now i just have to turn the stereo up real loud\nHypothesis: I'm upset that my walkman broke and now I have to turn the stereo up really loud.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "6", + "Fraction of T in Greenlist": "37.5%", + "z-score": "1.15", + "p value": "0.124", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.4790, 7.3467, 7.2169,\n 7.3760, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.4932, 10.6145, 10.5027, 10.3923, 10.5131, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.6667, 10.7835, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.0488, 11.1614, 11.0615, 10.9626,\n 10.8647, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 11.1172, 11.0227,\n 10.9291, 10.8363, 10.9462, 11.0554, 11.1640, 11.2719, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 11.9594, 12.0611, 11.9737, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.2891, 12.3883, 12.3027, 12.2178, 12.1335, 12.2325,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.4722,\n 12.3908, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.1746, 13.2668, 13.3585, 13.2796,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 13.8333,\n 13.7559, 13.8447, 13.9332, 13.8564, 13.7801, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: But a few Christian mosaics survive above the apse is the Virgin with the infant Jesus, with the Archangel Gabriel to the right (his companion Michael, to the left, has vanished save for a few feathers from his wings).\nHypothesis: Most of the Christian mosaics were destroyed by Muslims. \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -0.9264, -0.6667,\n -0.7423, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.4237, -0.4815, -0.2993, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.1143, -0.1707, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.5941,\n -0.4439, -0.4915, -0.3428, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.4747, -0.3443, -0.3862, -0.2568, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.1493, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.0863, 8.9815, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.2514, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.5175, 10.6265, 10.5397, 10.6481, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 10.9048, 11.0102, 11.1151, 11.0309, 10.9473, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.4209, 11.3402, 11.4411, 11.3610, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.4244, 11.5234, 11.6220, 11.7200, 11.8176, 11.7401, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.3163, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.6667,\n 12.5923, 12.6841, 12.7756, 12.7017, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: (Read for Slate 's take on Jackson's findings.)\nHypothesis: Slate had an opinion on Jackson's findings.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.7%", + "z-score": "-0.0821", + "p value": "0.533", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.9467, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.6167, 0.8165,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.8729,\n 0.8147, 0.9733, 0.9152, 1.0721, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.2611, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.2243, 1.1711, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, 0.0847, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "65.2%", + "z-score": "13", + "p value": "3.27e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.3333,\n 8.4953, 8.3283, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.5321, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 9.8020, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.3358, 10.2000, 10.3314, 10.4614, 10.5903, 10.7179, 10.8444,\n 10.7131, 10.5838, 10.4565, 10.3310, 10.4579, 10.3347, 10.4608, 10.3397,\n 10.4650, 10.3459, 10.4704, 10.3532, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.5027, 10.6232, 10.7429, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 11.1111, 11.0047, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.4849, 11.5950,\n 11.4945, 11.6041, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 11.8392,\n 11.9457, 12.0516, 12.1568, 12.2615, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.1836, 12.0902, 12.1936, 12.1012, 12.2040, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.5289, 12.6287, 12.7279, 12.8267, 12.9249,\n 13.0226, 12.9337, 12.8456, 12.7581, 12.6713, 12.7690, 12.6830, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.2791, 13.1957, 13.2895, 13.2068, 13.3002, 13.2182, 13.3113, 13.2299,\n 13.3227, 13.2419, 13.1617, 13.0821, 13.0030, 12.9244, 12.8464, 12.9391,\n 12.8616, 12.7847, 12.8771, 12.8007, 12.7248, 12.6494, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.8667, 12.9574, 13.0477])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Gays and lesbians.\nHypothesis: Heterosexuals.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.4191, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.6768, 0.6222, 0.5680, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.6430, 0.5952, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.3884, 0.3443, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "7.96e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 7.8889, 8.0212, 8.1524,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.2733, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.4057, 8.3231, 8.4423, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.6321, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.7388, 8.8527,\n 8.9660, 9.0786, 9.1905, 9.1119, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.0257, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.3113,\n 9.2368, 9.1629, 9.2704, 9.3774, 9.3040, 9.2311, 9.1587, 9.2651,\n 9.3708, 9.4761, 9.5808, 9.5089, 9.4375, 9.3665, 9.4707, 9.5743,\n 9.5038, 9.4338, 9.3642, 9.4673, 9.5698, 9.6719, 9.7735, 9.7043,\n 9.6356, 9.5673, 9.6684, 9.7690, 9.7011, 9.6336, 9.5666, 9.6667,\n 9.7663, 9.8654, 9.9641, 9.8974, 9.9957])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: At the end of Rue des Francs-Bourgeois is what many consider to be the city's most handsome residential square, the Place des Vosges, with its stone and red brick facades.\nHypothesis: Place des Vosges is constructed entirely of gray marble.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -0.8729, -0.9649, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.3245, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -0.9285, -0.7385,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.4619, -0.2872, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.0543, 0.1081, 0.2692, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.4611, 0.6124,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.6366, 0.7807, 0.9238, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.9366,\n 1.0735, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 1.0820, 1.2136, 1.3443, 1.4743, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.3453, 1.4713, 1.5967, 1.7213, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.4699, 1.5916, 1.7128, 1.8333,\n 1.9533, 2.0726, 2.0259, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.9%", + "z-score": "12.3", + "p value": "4.57e-35", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.9472, 7.8296, 7.9754, 8.1196, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.0632, 8.2035, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 8.9314, 9.0582,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.8838, 10.7987, 10.9048, 11.0102, 10.9259, 11.0309, 10.9473, 11.0517,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.1154, 11.0346, 10.9545,\n 11.0569, 11.1588, 11.2602, 11.3610, 11.4614, 11.5613, 11.4819, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.7200, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.6840, 11.7808, 11.8771, 11.8014, 11.7261, 11.6514, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.1468, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.2794, 12.3718, 12.2992])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: I burst through a set of cabin doors, and fell to the ground-\nHypothesis: I burst through the doors and fell down.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.3894, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 4.8038, 4.6790, 4.8712, 5.0602, 4.9377,\n 4.8177, 4.7002, 4.5850, 4.7703, 4.6571, 4.5461, 4.4371, 4.3301,\n 4.2251, 4.1219, 4.3026, 4.2008, 4.1008, 4.2784, 4.4537, 4.3546,\n 4.2571, 4.4296, 4.3333, 4.2385, 4.1451, 4.0531, 3.9624, 3.8730,\n 4.0415, 3.9530, 3.8657, 3.7796, 3.6947, 3.8600, 3.7758, 3.6927,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.2242, 3.1493, 3.0754, 3.0022, 3.1597, 3.0870, 3.0151, 2.9439,\n 2.8735, 2.8039, 2.7349, 2.8889, 3.0415, 2.9726, 2.9044, 3.0551,\n 3.2044, 3.1363, 3.2841, 3.2163, 3.1492, 3.0827, 3.0168, 3.1623,\n 3.0967, 3.2408, 3.1755, 3.3182, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.2025, 3.1395, 3.2788, 3.2161, 3.1539, 3.0923, 3.0311, 3.1685,\n 3.1076, 3.0471, 3.1831, 3.3181, 3.2577, 3.3915, 3.3314, 3.2717,\n 3.2124, 3.1536, 3.0952, 3.2271, 3.1690, 3.2998, 3.2419, 3.3717,\n 3.3140, 3.2567, 3.1998, 3.1433, 3.2715, 3.2152, 3.3424, 3.2863,\n 3.2306, 3.1753, 3.1203, 3.2460, 3.1912, 3.1368, 3.0827, 3.0290,\n 3.1532, 3.2768, 3.2230, 3.1696, 3.1166, 3.2389, 3.1860, 3.3075,\n 3.2547, 3.2023, 3.1502, 3.2705, 3.2186, 3.3381, 3.2863, 3.2348,\n 3.1836, 3.1327, 3.2509, 3.3686, 3.4857, 3.4346, 3.3838, 3.5000,\n 3.4494, 3.3990, 3.3489, 3.2991, 3.2496, 3.2004, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "192", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "64.6%", + "z-score": "12.7", + "p value": "4.52e-37", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 3.7017, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.0980, 3.9620, 3.8297, 4.0451, 3.9158, 4.1265, 4.0000,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.3618, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.0034, 4.8857, 5.0684, 5.2485, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.4909, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.4000, 5.2981, 5.4610, 5.6220, 5.7812, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 6.7886, 6.6935, 6.5997,\n 6.7414, 6.8819, 7.0211, 7.1591, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.1651, 8.0741,\n 8.2012, 8.1111, 8.0219, 7.9336, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.3453, 8.2588, 8.3813, 8.5030, 8.6238, 8.5381, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.8975, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 10.8515, 10.9545,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.4244, 11.5234, 11.6220, 11.7200, 11.6425, 11.5655, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.5394, 12.4638, 12.5568, 12.6494, 12.7416, 12.6667])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Fun for adults and children.\nHypothesis: Fun for only children.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "46.0%", + "z-score": "6.81", + "p value": "4.84e-12", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 2.9424, 3.1844, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.7852, 2.6681, 2.5538, 2.4422, 2.6667,\n 2.8868, 3.1027, 2.9913, 2.8823, 3.0929, 2.9856, 3.1918, 3.3947,\n 3.5942, 3.4873, 3.6831, 3.8759, 3.7700, 3.6662, 3.5642, 3.4641,\n 3.6522, 3.8376, 3.7383, 3.6407, 3.8228, 3.7264, 3.6315, 3.8103,\n 3.7166, 3.8927, 4.0667, 4.2385, 4.1451, 4.0531, 4.2222, 4.1312,\n 4.2981, 4.4630, 4.6262, 4.5356, 4.6967, 4.8561, 4.7662, 4.6775,\n 4.8347, 4.9904, 4.9023, 4.8154, 4.9691, 4.8830, 4.7980, 4.9497,\n 4.8655, 5.0156, 5.1643, 5.0807, 4.9980, 4.9163, 4.8355, 4.7556,\n 4.9019, 4.8226, 4.9675, 5.1111, 5.2535, 5.1745, 5.3156, 5.4554,\n 5.3769, 5.2992, 5.2223, 5.1461, 5.0707, 5.2086, 5.1338, 5.0596,\n 5.1962, 5.1225, 5.0496, 5.1848, 5.1123, 5.2463, 5.3793, 5.5114,\n 5.4391, 5.3675, 5.4983, 5.4272, 5.3567, 5.4863, 5.4163, 5.3468,\n 5.2779, 5.4062, 5.3378, 5.4650, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.5138, 5.6383, 5.5714, 5.5051, 5.6285, 5.5626, 5.6851, 5.8068,\n 5.9279, 5.8621, 5.9822, 6.1017, 6.0362, 5.9711, 5.9065, 5.8424,\n 5.9607, 6.0784, 6.0145, 5.9510, 6.0678, 6.0047, 5.9420, 6.0579,\n 5.9956, 6.1107, 6.2253, 6.3392, 6.2770, 6.2152, 6.3283, 6.2668,\n 6.3793, 6.4911, 6.6024, 6.5410, 6.6517, 6.7618, 6.7006, 6.6398,\n 6.5794, 6.5193, 6.4597, 6.5688, 6.5094, 6.4504, 6.5588, 6.5000,\n 6.4416, 6.5493, 6.4912, 6.5983, 6.7049, 6.8111])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165, 0.5774,\n 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570, 2.1939, 2.4910, 2.3333,\n 2.1822, 2.0370, 2.3190, 2.5924, 2.8577, 3.1156, 3.3665, 3.2205, 3.4641,\n 3.3221, 3.5590, 3.7905, 3.6515, 3.8772, 3.7417, 3.6098, 3.8297, 4.0451,\n 3.9158, 3.7897, 4.0000, 3.8765, 3.7559, 3.9614, 3.8431, 3.7273, 3.9284,\n 4.1260, 4.0119, 3.9001, 3.7905, 3.9837, 4.1740, 4.0657, 3.9595, 4.1461,\n 4.0415, 3.9386, 4.1219, 4.3026, 4.4809, 4.3788, 4.2784, 4.4537, 4.3546,\n 4.2571, 4.4296, 4.6000, 4.7683, 4.9346, 4.8375, 4.7419, 4.9058, 5.0679,\n 4.9731, 4.8797, 5.0395, 4.9472, 5.1051, 5.2614, 5.4160, 5.5691, 5.7207,\n 5.8707, 6.0193, 6.1664, 6.0740, 6.2197, 6.3640, 6.5069, 6.6486, 6.7890,\n 6.6973, 6.6066, 6.7456, 6.8834, 7.0201, 6.9303, 6.8414, 6.9768, 6.8889,\n 6.8019, 6.9361, 6.8500, 6.7648, 6.8977, 7.0296, 7.1605, 7.2904, 7.2058,\n 7.1220, 7.2508, 7.3786, 7.2956, 7.2134, 7.3402, 7.2587, 7.3845, 7.5094,\n 7.6335, 7.7567, 7.6758, 7.5955, 7.7178, 7.8393, 7.7597, 7.8803, 7.8014,\n 7.7232, 7.8429, 7.9619, 7.8842, 7.8072, 7.9253, 7.8489, 7.7732, 7.8905,\n 7.8153, 7.7407, 7.8571, 7.7831, 7.7096, 7.8253, 7.7524, 7.6800, 7.7949,\n 7.9091, 8.0227, 8.1356, 8.0636, 7.9921, 8.1043, 8.2158, 8.1448, 8.0742,\n 8.1851, 8.1150, 8.2252, 8.3349, 8.4439, 8.5524, 8.4826, 8.4133, 8.3446,\n 8.4523, 8.5595, 8.4911, 8.5978, 8.5298, 8.4623, 8.3952, 8.5012, 8.6066,\n 8.5399, 8.4736, 8.5785, 8.5126, 8.4471, 8.3820, 8.4862, 8.4215, 8.3572,\n 8.4608, 8.3969, 8.3333, 8.4364, 8.3732, 8.3103, 8.4128, 8.5148, 8.6164,\n 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: It's not that the questions they asked weren't interesting or legitimate (though most did fall under the category of already asked and answered).\nHypothesis: All of the questions were interesting according to a focus group consulted on the subject.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, 0.1974, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.4288, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.4540, 0.4020, 0.3504, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.5547, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.4145, 0.3721, 0.3299, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 5.3333, 5.6045, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 5.9214, 6.1546, 5.8989, 6.1283,\n 5.8890, 6.1143, 5.8889, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 6.8718, 6.7254, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.9169, 8.7986, 8.9355, 9.0711, 9.2055, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.6470, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 10.8082, 10.7066, 10.8215, 10.9355, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.2966, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.3985, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.6287, 12.7279, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.2166, 13.1279, 13.2243, 13.3201, 13.4155, 13.5105,\n 13.4230, 13.3361, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.6343, 13.7270, 13.8193, 13.9111, 14.0025, 14.0936, 14.0096, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.2939, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 14.8462, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Thebes held onto power until the 12th Dynasty, when its first king, Amenemhet Iwho reigned between 1980 1951 b.c. established a capital near Memphis.\nHypothesis: The capital near Memphis lasted only half a century before its inhabitants abandoned it for the next capital. \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.7107, 0.9169, 1.1202, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 1.0328,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.3746,\n -0.4201, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.0439, 0.0875, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.1101, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.0667, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 10.0242, 9.9146, 10.0385, 9.9304, 9.8237, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.0096, 11.9187, 11.8287, 11.9319,\n 12.0345, 11.9455, 12.0476, 12.1492, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.5615, 12.4746, 12.5732, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.5589, 13.6514, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.7612, 13.8522, 13.7706, 13.8613, 13.7803, 13.6999, 13.6201,\n 13.7106, 13.8007, 13.7215, 13.8113, 13.9007, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.1863, 14.2737, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: I don't mean to be glib about your concerns, but if I were you, I might be more concerned about the near-term rate implications of this $1.\nHypothesis: I am concerned more about your issues than the near-term rate implications.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.3101, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.3856, 1.3213, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.4580, 1.6186, 1.5556, 1.4931, 1.4313, 1.5892, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.5681, 1.5097, 1.6591, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.7217, 1.6646, 1.6081, 1.7522, 1.6958, 1.6398, 1.7823,\n 1.7264, 1.8676, 1.8119, 1.9518, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.8446, 1.7913, 1.9263, 2.0605,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.9813, 1.9291, 2.0604, 2.0083,\n 1.9566, 1.9052, 2.0350, 2.1640, 2.1125, 2.0613, 2.0105, 1.9599,\n 1.9097, 2.0369, 1.9868, 1.9370, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.7693, 1.7213, 1.8453, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.9473, 2.0688, 2.0212, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 2.0726, 2.0259, 1.9795, 1.9333, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 7.8360, 8.0076, 8.1763, 8.3423, 8.1689, 8.3333,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.4857, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.8020, 9.9392, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.0673, 10.2000, 10.3314, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.7099, 10.8350, 10.7084, 10.8328, 10.9560, 11.0782,\n 10.9546, 11.0762, 11.1967, 11.0755, 10.9559, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.5470, 11.6620, 11.7762, 11.8896, 12.0021,\n 12.1139, 12.2248, 12.3350, 12.2222, 12.3319, 12.4409, 12.3299, 12.4384,\n 12.5462, 12.4370, 12.3289, 12.4365, 12.5434, 12.6496, 12.7551, 12.8599,\n 12.7542, 12.8586, 12.7542, 12.8582, 12.9616, 12.8586, 12.9616, 12.8598,\n 12.9624, 12.8618, 12.9639, 13.0655, 13.1665, 13.2669, 13.3667, 13.4660,\n 13.5647, 13.6630, 13.5647, 13.6626, 13.7599, 13.8567, 13.7599, 13.8564,\n 13.9524, 13.8567, 13.9524, 14.0475, 13.9530, 13.8593, 13.7663, 13.8615,\n 13.7694, 13.8642, 13.9585, 13.8675, 13.9615, 13.8713, 13.7818, 13.8756,\n 13.9690, 13.8804, 13.9735, 14.0660, 13.9784, 13.8914, 13.8051, 13.7194,\n 13.6343, 13.7270, 13.8193, 13.7350, 13.8270, 13.9185, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.0248, 13.9427, 14.0329, 13.9515, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.2282, 14.3166, 14.4046, 14.3248, 14.2455, 14.3333,\n 14.2546, 14.3422, 14.2640, 14.3513, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Issues in Data Synthesis.\nHypothesis: Problems in data synthesis.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "87", + "Fraction of T in Greenlist": "43.7%", + "z-score": "6.1", + "p value": "5.36e-10", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.8301, 3.0792, 3.3221, 3.1844, 3.0509, 2.9212,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 3.6667,\n 3.5466, 3.4293, 3.3147, 3.2026, 3.0929, 2.9856, 2.8804, 2.7775,\n 2.9824, 3.1840, 3.3824, 3.2796, 3.1787, 3.0796, 3.2733, 3.1754,\n 3.0793, 2.9848, 2.8919, 2.8006, 2.9887, 3.1743, 3.0833, 3.2660,\n 3.1760, 3.3558, 3.2667, 3.1789, 3.3556, 3.2686, 3.4427, 3.6148,\n 3.7849, 3.6979, 3.6122, 3.5277, 3.6947, 3.8600, 4.0234, 4.1851,\n 4.1003, 4.0166, 3.9340, 4.0931, 4.0112, 3.9302, 3.8503, 4.0069,\n 4.1621, 4.3158, 4.2359, 4.1569, 4.0788, 4.0016, 4.1528, 4.3027,\n 4.4511, 4.5983, 4.5210, 4.4444, 4.3687, 4.5140, 4.4388, 4.3644,\n 4.5079, 4.6503, 4.7916, 4.9317, 4.8572, 4.7834, 4.7104, 4.6380,\n 4.7763, 4.9135, 5.0496, 5.1848, 5.1123, 5.0406, 4.9695, 5.1031,\n 5.0325, 4.9624, 4.8930, 5.0252, 5.1564, 5.2868, 5.2175, 5.1488,\n 5.0806, 5.0130, 5.1419, 5.2699, 5.3970, 5.5233, 5.4557, 5.3886,\n 5.3220, 5.4471, 5.3810, 5.3153, 5.2501, 5.3740, 5.4971, 5.6195,\n 5.5544, 5.4899, 5.4257, 5.3621, 5.4832, 5.6036, 5.7234, 5.8424,\n 5.7787, 5.7155, 5.6527, 5.7707, 5.7082, 5.6462, 5.7633, 5.8797,\n 5.9956, 6.1107, 6.0487, 5.9871, 5.9258, 5.8650, 5.9792, 6.0927,\n 6.2057, 6.3180, 6.2572, 6.1968, 6.1367, 6.2482, 6.1884, 6.1290,\n 6.0700, 6.1807, 6.2908, 6.4004, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.1091, 6.0519, 5.9950, 5.9385, 5.8822, 5.9905, 6.0982])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "199", + "Fraction of T in Greenlist": "100.0%", + "z-score": "24.4", + "p value": "3.76e-132", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 9.1652, 9.3274, 9.4868, 9.6437, 9.7980,\n 9.9499, 10.0995, 10.2470, 10.3923, 10.5357, 10.6771, 10.8167, 10.9545,\n 11.0905, 11.2250, 11.3578, 11.4891, 11.6189, 11.7473, 11.8743, 12.0000,\n 12.1244, 12.2474, 12.3693, 12.4900, 12.6095, 12.7279, 12.8452, 12.9615,\n 13.0767, 13.1909, 13.3041, 13.4164, 13.5277, 13.6382, 13.7477, 13.8564,\n 13.9642, 14.0712, 14.1774, 14.2829, 14.3875, 14.4914, 14.5945, 14.6969,\n 14.7986, 14.8997, 15.0000, 15.0997, 15.1987, 15.2971, 15.3948, 15.4919,\n 15.5885, 15.6844, 15.7797, 15.8745, 15.9687, 16.0624, 16.1555, 16.2481,\n 16.3401, 16.4317, 16.5227, 16.6132, 16.7033, 16.7929, 16.8819, 16.9706,\n 17.0587, 17.1464, 17.2337, 17.3205, 17.4069, 17.4929, 17.5784, 17.6635,\n 17.7482, 17.8326, 17.9165, 18.0000, 18.0831, 18.1659, 18.2483, 18.3303,\n 18.4120, 18.4932, 18.5742, 18.6548, 18.7350, 18.8149, 18.8944, 18.9737,\n 19.0526, 19.1311, 19.2094, 19.2873, 19.3649, 19.4422, 19.5192, 19.5959,\n 19.6723, 19.7484, 19.8242, 19.8997, 19.9750, 20.0499, 20.1246, 20.1990,\n 20.2731, 20.3470, 20.4206, 20.4939, 20.5670, 20.6398, 20.7123, 20.7846,\n 20.8567, 20.9284, 21.0000, 21.0713, 21.1424, 21.2132, 21.2838, 21.3542,\n 21.4243, 21.4942, 21.5639, 21.6333, 21.7025, 21.7715, 21.8403, 21.9089,\n 21.9773, 22.0454, 22.1133, 22.1811, 22.2486, 22.3159, 22.3830, 22.4499,\n 22.5167, 22.5832, 22.6495, 22.7156, 22.7816, 22.8473, 22.9129, 22.9783,\n 23.0434, 23.1084, 23.1733, 23.2379, 23.3024, 23.3666, 23.4307, 23.4947,\n 23.5584, 23.6220, 23.6854, 23.7487, 23.8118, 23.8747, 23.9374, 24.0000,\n 24.0624, 24.1247, 24.1868, 24.2487, 24.3105, 24.3721, 24.4336])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: well you see that on television also\nHypothesis: You can see that on television, as well.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.7877, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.5119, 1.4403, 1.6187, 1.5475, 1.7233,\n 1.6524, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.2611, 1.4071, 1.3517, 1.2968, 1.4410, 1.5842,\n 1.7264, 1.6710, 1.6160, 1.7566, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.7913, 1.7384, 1.8732,\n 1.8204, 1.9540, 1.9013, 2.0339, 1.9813, 1.9291, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.8033, 1.7529, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.3303, 1.4551, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.5159, 1.4699, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.5298, 1.4846, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.4885, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.1084, 9.2582,\n 9.1002, 9.2488, 9.0949, 8.9443, 8.7967, 8.9455, 8.8015, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.9570, 9.8367, 9.7181, 9.6011,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.1024, 10.2222, 10.3411, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.0883, 11.1991, 11.3091, 11.4184, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.5515, 11.6584, 11.7647, 11.8704, 11.7766, 11.8818,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.2040, 12.1125, 12.0218, 11.9319,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.0611, 11.9737, 12.0749, 11.9883,\n 12.0891, 12.1893, 12.1036, 12.2034, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.7199, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.6643, 12.7597, 12.6785, 12.7735, 12.8680, 12.9621, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.1746, 13.0956, 13.0171, 13.1094,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.6155, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Vrenna and I both fought him and he nearly took us.\nHypothesis: Neither Vrenna nor myself have ever fought him.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 3.9337, 4.1603, 4.0166,\n 3.8772, 3.7417, 3.9620, 4.1779, 4.0451, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.1633, 4.3618, 4.2426, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.0657, 3.9595, 3.8552, 3.7528,\n 3.9386, 4.1219, 4.3026, 4.4809, 4.3788, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.8667, 5.0332, 4.9346, 5.0990, 5.0017, 5.1640,\n 5.3245, 5.4832, 5.3867, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.4772, 5.6286, 5.5377, 5.6875, 5.5976, 5.5088, 5.6569,\n 5.8035, 5.7155, 5.6285, 5.7735, 5.6874, 5.6023, 5.5181, 5.6614,\n 5.5780, 5.4956, 5.4140, 5.3333, 5.4747, 5.6149, 5.5348, 5.4554,\n 5.3769, 5.5155, 5.4377, 5.3606, 5.2842, 5.2086, 5.3455, 5.4813,\n 5.4061, 5.3316, 5.2578, 5.1848, 5.1123, 5.0406, 4.9695, 4.8990,\n 5.0325, 4.9624, 4.8930, 5.0252, 4.9562, 4.8878, 5.0187, 5.1488,\n 5.2779, 5.2096, 5.1419, 5.0747, 5.0080, 4.9419, 5.0694, 5.0037,\n 5.1303, 5.0649, 5.1905, 5.1255, 5.0609, 5.1854, 5.3092, 5.4322,\n 5.3677, 5.4899, 5.4257, 5.5470, 5.4832, 5.4199, 5.3571, 5.2947,\n 5.4147, 5.5340, 5.4718, 5.4100, 5.3487, 5.4670, 5.4059, 5.3452,\n 5.2850, 5.2251, 5.1657, 5.2827, 5.2235, 5.1647, 5.1063, 5.0483,\n 4.9906, 5.1064, 5.2215, 5.1640, 5.1068, 5.2211, 5.1642, 5.1075,\n 5.0513, 5.1647, 5.1086, 5.0529, 4.9975, 4.9425, 5.0548, 5.1667,\n 5.1117, 5.0571, 5.0027, 5.1137, 5.0595, 5.0057, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "160", + "Fraction of T in Greenlist": "80.4%", + "z-score": "18", + "p value": "4.02e-73", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.7104, 11.5799, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 11.7978, 11.9138, 12.0289, 11.9062, 12.0208,\n 12.1346, 12.2474, 12.3595, 12.2398, 12.1216, 12.0049, 12.1171, 12.2286,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.5531, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 12.9704, 13.0748, 13.1785, 13.2816,\n 13.1741, 13.2768, 13.3789, 13.2730, 13.1681, 13.2701, 13.3714, 13.4722,\n 13.3690, 13.4694, 13.5693, 13.6685, 13.7672, 13.8654, 13.7642, 13.6640,\n 13.7621, 13.8595, 13.7606, 13.6626, 13.7599, 13.8567, 13.9531, 13.8564,\n 13.9524, 14.0479, 14.1429, 14.2374, 14.3314, 14.4250, 14.5181, 14.6107,\n 14.7029, 14.7947, 14.8860, 14.9769, 15.0674, 15.1574, 15.2470, 15.3362,\n 15.4250, 15.5134, 15.6014, 15.6891, 15.7763, 15.8631, 15.9496, 16.0357,\n 16.1214, 16.2068, 16.2917, 16.3764, 16.4607, 16.5446, 16.6282, 16.7115,\n 16.7944, 16.8770, 16.9592, 17.0411, 17.1227, 17.2040, 17.2850, 17.3656,\n 17.4460, 17.5260, 17.6058, 17.6852, 17.7643, 17.8432, 17.7546, 17.6667,\n 17.7455, 17.8241, 17.9023, 17.9803, 17.8935, 17.9714, 18.0489])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: This analysis pooled estimates from these two studies to develop a C-R function linking PM to chronic bronchitis.\nHypothesis: The analysis proves that there is no link between PM and bronchitis.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.1516, -0.2255, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.6231, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -1.9149,\n -1.9528, -1.8175, -1.8556, -1.8935, -1.7595, -1.7974, -1.8352, -1.7025,\n -1.7404, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "199", + "Fraction of T in Greenlist": "100.0%", + "z-score": "24.4", + "p value": "3.76e-132", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 9.1652, 9.3274, 9.4868, 9.6437, 9.7980,\n 9.9499, 10.0995, 10.2470, 10.3923, 10.5357, 10.6771, 10.8167, 10.9545,\n 11.0905, 11.2250, 11.3578, 11.4891, 11.6189, 11.7473, 11.8743, 12.0000,\n 12.1244, 12.2474, 12.3693, 12.4900, 12.6095, 12.7279, 12.8452, 12.9615,\n 13.0767, 13.1909, 13.3041, 13.4164, 13.5277, 13.6382, 13.7477, 13.8564,\n 13.9642, 14.0712, 14.1774, 14.2829, 14.3875, 14.4914, 14.5945, 14.6969,\n 14.7986, 14.8997, 15.0000, 15.0997, 15.1987, 15.2971, 15.3948, 15.4919,\n 15.5885, 15.6844, 15.7797, 15.8745, 15.9687, 16.0624, 16.1555, 16.2481,\n 16.3401, 16.4317, 16.5227, 16.6132, 16.7033, 16.7929, 16.8819, 16.9706,\n 17.0587, 17.1464, 17.2337, 17.3205, 17.4069, 17.4929, 17.5784, 17.6635,\n 17.7482, 17.8326, 17.9165, 18.0000, 18.0831, 18.1659, 18.2483, 18.3303,\n 18.4120, 18.4932, 18.5742, 18.6548, 18.7350, 18.8149, 18.8944, 18.9737,\n 19.0526, 19.1311, 19.2094, 19.2873, 19.3649, 19.4422, 19.5192, 19.5959,\n 19.6723, 19.7484, 19.8242, 19.8997, 19.9750, 20.0499, 20.1246, 20.1990,\n 20.2731, 20.3470, 20.4206, 20.4939, 20.5670, 20.6398, 20.7123, 20.7846,\n 20.8567, 20.9284, 21.0000, 21.0713, 21.1424, 21.2132, 21.2838, 21.3542,\n 21.4243, 21.4942, 21.5639, 21.6333, 21.7025, 21.7715, 21.8403, 21.9089,\n 21.9773, 22.0454, 22.1133, 22.1811, 22.2486, 22.3159, 22.3830, 22.4499,\n 22.5167, 22.5832, 22.6495, 22.7156, 22.7816, 22.8473, 22.9129, 22.9783,\n 23.0434, 23.1084, 23.1733, 23.2379, 23.3024, 23.3666, 23.4307, 23.4947,\n 23.5584, 23.6220, 23.6854, 23.7487, 23.8118, 23.8747, 23.9374, 24.0000,\n 24.0624, 24.1247, 24.1868, 24.2487, 24.3105, 24.3721, 24.4336])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: He turned and smiled at Vrenna.\nHypothesis: He smiled at Vrenna who was walking slowly behind him with her mother.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -0.9901, -1.0735, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.2210, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.2730, -1.3131, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.3088, -1.3474, -1.3859, -1.4241, -1.2950, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "173", + "Fraction of T in Greenlist": "86.9%", + "z-score": "20.2", + "p value": "7.77e-91", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 5.8140, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.5940, 13.6950, 13.7953, 13.8952, 13.9944, 14.0930, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.6736, 14.7685, 14.8629, 14.9568, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.9591, 16.0476, 16.1357, 16.2233, 16.3106, 16.3975, 16.4839,\n 16.5700, 16.6557, 16.7410, 16.8259, 16.9105, 16.9947, 17.0785, 17.1620,\n 17.2451, 17.3279, 17.4103, 17.4924, 17.5741, 17.6556, 17.7367, 17.8174,\n 17.8979, 17.9780, 18.0578, 18.1373, 18.2165, 18.2954, 18.3739, 18.4522,\n 18.5302, 18.6079, 18.6853, 18.7625, 18.8393, 18.9159, 18.9921, 19.0681,\n 19.1439, 19.2194, 19.2946, 19.3695, 19.4442, 19.5186, 19.5928, 19.6667,\n 19.7403, 19.8137, 19.8869, 19.9598, 20.0325, 20.1049, 20.1771])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: We sought to identify practices that were commonly implemented by the agencies within the past 5 years.\nHypothesis: We want to identify practices commonly used by agencies in the last 5 years\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.7325, -0.5608, -0.6140, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.0401, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.7462, -0.6128, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.5518, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "68.5%", + "z-score": "14.1", + "p value": "1.67e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.2668, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.6140, 7.7723, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.3550,\n 9.2410, 9.3721, 9.2600, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 9.9469, 10.0692, 10.1905,\n 10.0855, 10.2061, 10.1024, 10.0000, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 10.8801, 10.7843, 10.8960, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.6059, 11.5156, 11.6206, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.1622, 12.2628, 12.3629,\n 12.2758, 12.3754, 12.2891, 12.2034, 12.3027, 12.4015, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.8160, 12.9116, 13.0067,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.5408, 13.6313, 13.5526, 13.4744, 13.5647, 13.6546, 13.5771, 13.6667,\n 13.7559, 13.8447, 13.9332, 14.0214, 14.1091])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The other men shuffled.\nHypothesis: The other men were shuffled around.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.3736, -2.4228, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.7272, -2.7713, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -2.8368,\n -2.8786, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.6992, -2.7406,\n -2.5718, -2.4045, -2.4467, -2.4887, -2.5303, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.5769, -2.6163, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.6984, -2.7361, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.5936, -2.6309, -2.4872, -2.3443, -2.3822, -2.4198, -2.4572, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.5181, -2.5538,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.7358, -2.7701, -2.6393, -2.6737, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 4.8177, 5.0034, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.2532, 7.1591, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.6064, 7.7387, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 8.9444, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.7439, 8.8631, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.1615, 9.0773, 9.1927, 9.1094,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.2697, 9.3810, 9.3017, 9.2232, 9.3338, 9.2559, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.4619, 9.3863, 9.3113,\n 9.2368, 9.3443, 9.2704, 9.1970, 9.3040, 9.4103, 9.3374, 9.4432,\n 9.5485, 9.6532, 9.7574, 9.6850, 9.7886, 9.8918, 9.8198, 9.7483,\n 9.8510, 9.7800, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.1750, 10.2743, 10.3730, 10.3038, 10.2350, 10.1667,\n 10.2650, 10.3628, 10.4603, 10.5573, 10.4893, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: States must show reasonable progress in their state implementation plans toward the congressionally mandated goal of returning to natural conditions in national parks and wilderness areas.\nHypothesis: Itis not necessary for there to be any improvement.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.9658, 0.9115, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 1.0000, 0.9492, 0.8987, 0.8485, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.9979, 0.9497, 1.0820, 1.0338, 0.9858, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.1852, 1.1380, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.2049, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.1990, 1.1547, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 6.9307, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.3485, 7.5186, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.5218, 8.6702, 8.8168, 8.9618, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.5366, 9.4087, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.2202, 10.3459, 10.2283, 10.1124, 10.2375, 10.3615, 10.4846, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.8542, 10.7429, 10.8616, 10.7518, 10.6434,\n 10.7616, 10.6547, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.6894, 11.5866, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.2541, 12.1568, 12.2615, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.7875, 12.8877, 12.7943, 12.8942,\n 12.8017, 12.7100, 12.8095, 12.9085, 13.0071, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.2166, 13.3128, 13.2243, 13.1364, 13.2324, 13.1453,\n 13.0590, 13.1547, 13.2499, 13.3447, 13.4390, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.9959, 13.9111, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.3897, 14.4780, 14.3970, 14.3166, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: well it's been very interesting\nHypothesis: It has been very intriguing.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.8412, 1.0613, 1.2778, 1.1926, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 1.0915, 1.2577, 1.4222, 1.3587,\n 1.2959, 1.4580, 1.6186, 1.7778, 1.7143, 1.6514, 1.5892, 1.7457,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.8682, 1.8071, 1.9582, 1.8974,\n 1.8370, 1.9863, 1.9261, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.6081, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.4201, 1.3663, 1.3128, 1.2597, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.5714, 1.7085, 1.8446, 1.7913, 1.9263, 2.0605,\n 2.1938, 2.3262, 2.2723, 2.2188, 2.1656, 2.2966, 2.4267, 2.3735,\n 2.3206, 2.2680, 2.3967, 2.3443, 2.4721, 2.4198, 2.3679, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 2.1637, 2.2871, 2.2377, 2.1886, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.9738, 2.0943, 2.0470, 2.0000,\n 2.1195, 2.2384, 2.3567, 2.3094, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 6.5672, 6.7778, 6.9830, 6.7543, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 6.7390, 6.9307, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.1317, 6.9631, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.4540, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.4868,\n 9.3951, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.6828, 9.5938,\n 9.7091, 9.6210, 9.7356, 9.8494, 9.9625, 9.8753, 9.7890, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.1692, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.3544, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.5366, 10.6397, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.7920, 10.7159, 10.8170, 10.7415, 10.8421, 10.9422, 11.0418, 10.9669,\n 10.8925, 10.9917, 10.9178, 11.0165, 11.1148, 11.2126, 11.1392, 11.0663,\n 11.1637, 11.0913, 11.1883, 11.2848, 11.3809, 11.3091, 11.2376, 11.3333,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: He started slowly back to the bunkhouse.\nHypothesis: He returned slowly to the bunkhouse.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, 0.0000, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.0206,\n -1.0675, -1.1140, -0.9584, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.8266, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.2722, -0.3166, -0.1803, -0.0449, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.3522, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.8490, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 4.8742, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 6.9945, 7.1393,\n 7.0379, 6.9378, 6.8391, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.6210, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.0219, 8.1481, 8.0598, 7.9724, 7.8859, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.5381, 8.6581, 8.5732,\n 8.4891, 8.6083, 8.5249, 8.4423, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.8636, 8.9783, 9.0923, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.7224, 9.8293, 9.9357, 10.0416,\n 9.9648, 9.8887, 9.9940, 9.9184, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.4909, 10.5921, 10.5181, 10.6187,\n 10.7189, 10.6455, 10.5725, 10.6722, 10.7714, 10.8702, 10.7978, 10.7258,\n 10.8241, 10.7527, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.2872, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: and it's it's quite a bit i think six something is the state and and uh the rest of the pie goes elsewhere but we're in a particular part of the state that's pretty well off so it's it's like we get a lot of that back as far as local taxation goes\nHypothesis: I do not know exactly where the local taxes go.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.9771, 1.1926, 1.1088, 1.0265, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.3856, 1.5511, 1.4863, 1.6498, 1.5852,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.4412, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.5097, 1.4517, 1.3943, 1.5430, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.4744, 1.4201, 1.3663, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.5187, 1.6554, 1.6028, 1.5505, 1.6859,\n 1.8204, 1.7679, 1.9013, 1.8490, 1.7970, 1.9291, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.8033, 1.7529, 1.7028, 1.8317, 1.7817,\n 1.7321, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.7894, 1.7408,\n 1.6925, 1.8175, 1.9419, 1.8935, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.7780, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.7870, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.4083, 10.5236, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 10.8012, 10.9123, 11.0227,\n 10.9291, 11.0389, 11.1480, 11.0554, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.7696, 11.8719, 11.7851, 11.8870, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.3603, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.6643, 12.5831, 12.6785, 12.7735, 12.6930, 12.7876, 12.8817,\n 12.8019, 12.8957, 12.9891, 12.9099, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.1233, 13.2149, 13.3060, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.6155, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: They're made from a secret recipe handed down to the present-day villagers by their Mallorcan ancestors, who came here in the early 17th century as part of an official repopulation scheme.\nHypothesis: The recipe passed down from Mallorcan ancestors is known to everyone.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 1.2372, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.6678, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.1241, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.0401, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.8916,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.4592, 11.3721, 11.2857, 11.3899, 11.4935, 11.4080, 11.5111, 11.4263,\n 11.5290, 11.6311, 11.5471, 11.4638, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.6029, 11.5217, 11.4411, 11.5414, 11.6412, 11.5613, 11.4819, 11.5813,\n 11.6802, 11.6016, 11.7000, 11.6220, 11.7200, 11.8176, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.8014, 11.8973, 11.8221, 11.9176,\n 12.0127, 11.9380, 11.8638, 11.7901, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.9273, 11.8551, 11.7833, 11.8769, 11.9701, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah well you're a student right\nHypothesis: Well you're a mechanics student right?\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.3235, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.8431, 4.0446, 3.9284, 4.1260, 4.0119,\n 3.9001, 3.7905, 3.9837, 3.8759, 3.7700, 3.6662, 3.8552, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.4809, 4.3788, 4.5544, 4.4537, 4.6268,\n 4.5274, 4.6981, 4.6000, 4.5034, 4.6715, 4.5760, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.3727, 4.2836, 4.1957, 4.1090, 4.0234, 3.9389,\n 3.8555, 4.0166, 4.1761, 4.3339, 4.2507, 4.1684, 4.3241, 4.2426,\n 4.1621, 4.0825, 4.0038, 3.9260, 4.0788, 4.2303, 4.1528, 4.3027,\n 4.4511, 4.5983, 4.5210, 4.4444, 4.5899, 4.5140, 4.4388, 4.3644,\n 4.2907, 4.2178, 4.3609, 4.5029, 4.6437, 4.5708, 4.7104, 4.8488,\n 4.7763, 4.7044, 4.6332, 4.5626, 4.4927, 4.4234, 4.3547, 4.2866,\n 4.2191, 4.3548, 4.4895, 4.6232, 4.5557, 4.4888, 4.6212, 4.5547,\n 4.4887, 4.4233, 4.3583, 4.2940, 4.4246, 4.5543, 4.6832, 4.6188,\n 4.7467, 4.8737, 4.8095, 4.7458, 4.6825, 4.6198, 4.5575, 4.6829,\n 4.8076, 4.7455, 4.8693, 4.9923, 5.1146, 5.0525, 4.9908, 5.1121,\n 5.0507, 4.9897, 4.9292, 4.8690, 4.8093, 4.7500, 4.6911, 4.6325,\n 4.5744, 4.6938, 4.8125, 4.9305, 4.8724, 4.8146, 4.9317, 4.8742,\n 4.8170, 4.7602, 4.7037, 4.6476, 4.7635, 4.8787, 4.8227, 4.9373,\n 5.0513, 5.1647, 5.1086, 5.2213, 5.3335, 5.2776, 5.2220, 5.1667,\n 5.1117, 5.0571, 5.0027, 4.9487, 4.8950, 4.8416, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "66.5%", + "z-score": "13.5", + "p value": "1.52e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.4017, 8.5491, 8.6948, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.7586, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.9146, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.5623, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.5391, 11.4440, 11.5515, 11.6584, 11.7647, 11.6709, 11.7766, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.5188, 12.6190, 12.7187, 12.6287, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.5615, 12.4746, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.5129, 12.4289, 12.3455, 12.4430, 12.3603, 12.4575, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.5979, 12.6930, 12.6130, 12.7077,\n 12.6283, 12.7226, 12.6439, 12.7378, 12.8313, 12.9244, 13.0171, 12.9391,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.1520, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.3615, 13.4510])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: it really is i heard something that their supposed to be starting a huge campaign in New York about um child abuse and stopping child abuse and it's supposed to be like it's starting there supposed to be like a big nationwide campaign and you know so hopefully that will take off and really do something i don't know there's just\nHypothesis: It's unfortunate that nobody is organizing a child abuse campaign.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -0.8893, -0.9608, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.5871, 0.7441, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 1.0370, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.1692, 1.3166, 1.2611, 1.2060, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.4878, 7.6613, 7.8320, 7.6667,\n 7.8355, 7.6751, 7.8420, 8.0064, 7.8512, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 8.9709, 9.1130, 8.9815,\n 8.8522, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.6960, 9.8271, 9.9570, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.3532, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.5027, 10.6232, 10.7429, 10.8616, 10.9794, 10.8699,\n 10.9870, 10.8790, 10.7722, 10.8889, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.3468, 12.2474,\n 12.1491, 12.2541, 12.1568, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.2868, 12.3895, 12.4915, 12.3985, 12.5001, 12.6012, 12.7017,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.5987, 13.6931,\n 13.7870, 13.8804, 13.7926, 13.7054, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.2633, 14.3537, 14.2686, 14.1842, 14.2744,\n 14.1906, 14.1074, 14.1974, 14.1149, 14.2046, 14.2939, 14.3828, 14.3011,\n 14.3897, 14.4780, 14.3970, 14.4850, 14.5726, 14.6599, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Postal Service were to reduce delivery frequency.\nHypothesis: The postal service could deliver less frequently.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -0.8542, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.5636, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.4444, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.7143, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -0.9326, -0.7971, -0.6623, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.7905, 3.9837, 4.1740, 4.3614, 4.5461, 4.4371, 4.3301,\n 4.5115, 4.4061, 4.5847, 4.4809, 4.3788, 4.5544, 4.7278, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.1640,\n 5.3245, 5.2281, 5.1332, 5.2915, 5.4482, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.4772, 5.3865, 5.2970, 5.2086, 5.1212, 5.2719, 5.1855,\n 5.3345, 5.2489, 5.1643, 5.0807, 5.2278, 5.3736, 5.5181, 5.4349,\n 5.3526, 5.4956, 5.6373, 5.7778, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.2459, 6.3807, 6.2991, 6.4327, 6.5653, 6.6968, 6.6157, 6.7462,\n 6.8757, 6.7952, 6.9237, 7.0513, 7.1779, 7.0980, 7.0187, 6.9402,\n 6.8624, 6.9879, 7.1125, 7.2363, 7.1590, 7.2818, 7.4039, 7.5251,\n 7.6456, 7.7653, 7.8842, 8.0024, 8.1198, 8.2365, 8.1594, 8.0829,\n 8.1988, 8.3140, 8.4286, 8.3526, 8.2772, 8.3910, 8.5041, 8.6166,\n 8.5417, 8.6535, 8.7647, 8.8752, 8.9851, 9.0944, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.9038, 9.8303, 9.7574, 9.6850, 9.7886, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: And in another shift in the economy, it was found that lamb could be raised more cost-effectively on lowland farms in part because of the richer, more nutritious grazing land available there and as a result Lakeland farms became less profitable.\nHypothesis: Another shift in the economy was found to be more nutritious.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.8034, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 2.2418, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.3706, 2.2743, 2.1798, 2.0870, 2.2916, 2.1997, 2.1094, 2.0207,\n 1.9335, 1.8477, 2.0455, 1.9604, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.6524, 1.8257, 1.7552, 1.6854, 1.6164, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.5852,\n 1.5213, 1.6823, 1.8419, 1.7778, 1.7143, 1.8716, 2.0276, 2.1822,\n 2.1182, 2.0548, 2.2074, 2.1442, 2.0817, 2.0197, 1.9582, 2.1082,\n 2.0470, 1.9863, 1.9261, 2.0739, 2.2205, 2.1602, 2.3054, 2.2454,\n 2.3891, 2.3293, 2.4717, 2.4121, 2.3529, 2.2943, 2.4348, 2.3764,\n 2.3183, 2.2608, 2.3995, 2.3422, 2.2852, 2.4225, 2.3657, 2.5019,\n 2.6370, 2.5802, 2.7143, 2.8475, 2.7906, 2.9227, 2.8660, 2.8098,\n 2.7539, 2.6984, 2.6433, 2.5886, 2.7186, 2.6640, 2.7930, 2.9212,\n 2.8666, 2.8124, 2.7585, 2.8853, 2.8316, 2.7783, 2.7253, 2.8508,\n 2.7979, 2.7454, 2.6932, 2.6414, 2.5898, 2.7137, 2.6623, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.4099, 2.3603, 2.3110, 2.4327, 2.3835,\n 2.5044, 2.4553, 2.4065, 2.3580, 2.3098, 2.2618, 2.2141, 2.3333,\n 2.2857, 2.2384, 2.3567, 2.4744, 2.5915, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.0451, 3.9158, 3.7897, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.0446, 3.9284, 4.1260, 4.3205,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.3614, 4.5461, 4.7281, 4.6188,\n 4.5115, 4.6904, 4.5847, 4.7610, 4.9348, 5.1065, 5.0019, 4.8990,\n 4.7977, 4.9666, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.4222,\n 5.5811, 5.7382, 5.8936, 5.7955, 5.6986, 5.8522, 6.0041, 5.9084,\n 5.8139, 5.7207, 5.8707, 6.0193, 6.1664, 6.0740, 5.9827, 6.1283,\n 6.0380, 6.1820, 6.3248, 6.4663, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.2443, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.6315, 7.7566, 7.8808, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.3231, 8.2413, 8.3605, 8.4788, 8.3977, 8.3172,\n 8.2375, 8.1585, 8.2760, 8.1976, 8.3143, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.6190, 8.7323, 8.8448, 8.9567, 8.8800, 8.9912,\n 9.1018, 9.0257, 9.1357, 9.0601, 9.1694, 9.0944, 9.0200, 9.1287,\n 9.0548, 9.1629, 9.2704, 9.3774, 9.3040, 9.2311, 9.3374, 9.2651,\n 9.3708, 9.4761, 9.4042, 9.5089, 9.6130, 9.5416, 9.6452, 9.5743,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.0061, 10.1058, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.4312, 10.5286, 10.4603, 10.3923, 10.4893, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The rule contains information collection requirements which will allow EPA to determine that detergent additives which are effective in controlling deposits are used and that emission control goals are realized.\nHypothesis: The rule has data collection requirements which aid the EPA to realize their emission control goals.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.4697, 1.3744, 1.6013, 1.8240, 2.0428, 1.9462, 1.8516,\n 2.0647, 2.2743, 2.1798, 2.3851, 2.2916, 2.1997, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.0455, 1.9604, 2.1546, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.7765, 1.6997, 1.6239, 1.8074,\n 1.9887, 2.1678, 2.0913, 2.0158, 2.1918, 2.1167, 2.0426, 1.9695,\n 2.1420, 2.0692, 2.2393, 2.1669, 2.0954, 2.2629, 2.1917, 2.1213,\n 2.0517, 1.9829, 1.9149, 2.0785, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.7778, 1.7143, 1.6514, 1.8084, 1.9640,\n 2.1182, 2.0548, 1.9920, 2.1442, 2.0817, 2.0197, 1.9582, 2.1082,\n 2.0470, 2.1954, 2.1344, 2.0739, 2.0140, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.1183, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.4335, 1.5714, 1.5187, 1.4662, 1.6028, 1.7384, 1.6859,\n 1.8204, 1.7679, 1.7158, 1.8490, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.6230, 1.5731, 1.5236, 1.6530, 1.7817,\n 1.9097, 1.8598, 1.8102, 1.9370, 1.8875, 1.8383, 1.7894, 1.9149,\n 2.0396, 1.9906, 1.9419, 2.0656, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.6843, 1.8058, 1.7592, 1.7128, 1.8333,\n 1.9533, 2.0726, 2.0259, 1.9795, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.6813, 9.5743, 9.4685, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 11.1172, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 11.9455, 11.8571, 11.7696, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 11.8172, 11.9181, 12.0185, 11.9341, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.6643, 12.7597, 12.8546, 12.7735, 12.8680, 12.9621, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.5292, 13.4499,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.5647, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.6789, 13.7679, 13.6914, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Almost every hill has to the northeast there's a Moorish fort; no fewer than four ruined fortresses guard the harbour entrance; and two more, still in good repair the Atalaya and Galeras castles protect the sea-front arsenal, of vital importance to Spain's military.\nHypothesis: There are no castles Atalaya and Galeras.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "174", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "20.7%", + "z-score": "-1.31", + "p value": "0.905", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -1.9702, -2.0412,\n -2.1106, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.2743, -2.0294, -2.0870, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.9604, -1.7376, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.5492,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.5164, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.0050, -1.0513, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.2730, -1.3131])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 6.1101, 5.8966, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.4138, 8.5448, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.7224, 9.6307, 9.7473, 9.6566, 9.7725, 9.6828, 9.7980,\n 9.9124, 10.0261, 9.9373, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.2348, 10.3445, 10.2592, 10.1745, 10.2837, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.6196, 10.5393, 10.6439, 10.7480, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.0004, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.1447, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.1270, 12.2209, 12.3143, 12.2403, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.5367, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Were they in there?\nHypothesis: Were they supposed to be in there?\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.9704, 2.8301, 2.6943, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 2.6681, 2.5538, 2.7791, 3.0000,\n 3.2167, 3.1027, 3.3147, 3.5228, 3.7273, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.0817, 3.2796, 3.1787, 3.0796, 3.2733, 3.4641,\n 3.3657, 3.2691, 3.4562, 3.6407, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.8000, 3.7087, 3.6187, 3.5301, 3.7025, 3.8730,\n 4.0415, 3.9530, 3.8657, 4.0316, 3.9452, 4.1090, 4.0234, 4.1851,\n 4.3451, 4.2601, 4.1761, 4.3339, 4.2507, 4.1684, 4.0872, 4.2426,\n 4.1621, 4.0825, 4.0038, 4.1569, 4.0788, 4.0016, 4.1528, 4.0762,\n 4.2258, 4.3740, 4.2977, 4.2222, 4.1475, 4.2938, 4.4388, 4.5826,\n 4.5079, 4.6503, 4.7916, 4.9317, 4.8572, 4.7834, 4.7104, 4.6380,\n 4.5663, 4.7044, 4.6332, 4.5626, 4.4927, 4.4234, 4.3547, 4.4907,\n 4.6258, 4.5573, 4.4895, 4.6232, 4.7559, 4.8878, 4.8200, 4.7527,\n 4.8833, 5.0130, 4.9460, 4.8795, 5.0080, 4.9419, 4.8763, 4.8113,\n 4.9385, 5.0649, 5.0000, 5.1255, 5.0609, 5.1854, 5.3092, 5.2449,\n 5.1810, 5.3038, 5.4257, 5.3621, 5.4832, 5.4199, 5.3571, 5.4772,\n 5.5967, 5.5340, 5.4718, 5.5904, 5.5284, 5.4670, 5.4059, 5.3452,\n 5.4626, 5.4023, 5.3423, 5.2827, 5.2235, 5.1647, 5.1063, 5.0483,\n 4.9906, 4.9333, 4.8763, 4.8197, 4.9351, 5.0499, 5.1642, 5.1075,\n 5.0513, 4.9953, 4.9397, 4.8845, 4.9975, 4.9425, 5.0548, 5.1667,\n 5.2779, 5.2229, 5.1681, 5.1137, 5.0595, 5.0057, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "7.96e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 7.8889, 8.0212, 8.1524,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.2733, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.4057, 8.3231, 8.4423, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.6321, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.7388, 8.8527,\n 8.9660, 9.0786, 9.1905, 9.1119, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.0257, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.3113,\n 9.2368, 9.1629, 9.2704, 9.3774, 9.3040, 9.2311, 9.1587, 9.2651,\n 9.3708, 9.4761, 9.5808, 9.5089, 9.4375, 9.3665, 9.4707, 9.5743,\n 9.5038, 9.4338, 9.3642, 9.4673, 9.5698, 9.6719, 9.7735, 9.7043,\n 9.6356, 9.5673, 9.6684, 9.7690, 9.7011, 9.6336, 9.5666, 9.6667,\n 9.7663, 9.8654, 9.9641, 9.8974, 9.9957])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Felicia's Journey takes place behind the eyes of its central a young Irish girl, Felicia, who crosses the sea to England in a hopeful quest to find the father of her unborn child; and the fat, middle-aged catering manager, Hiditch, who takes a paternal interest in the lass when it becomes clear that her young man has caddishly given her the slip.\nHypothesis: The woman did not care where the man was as long as it was far.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.0461, -1.8716, -1.9180, -1.9640,\n -1.7923, -1.8385, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -1.9107, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.7990, -1.8411, -1.8829, -1.7321,\n -1.7740, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.4553, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.4042, -2.2740, -2.3094, -2.3447, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.7336, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 4.9316, 4.8038, 4.9962, 5.1854, 5.0602, 4.9377,\n 4.8177, 5.0034, 5.1864, 5.0684, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.4909, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 6.0928, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.8641, 7.0133, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.1925, 9.0987, 9.0057, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.3617, 9.4763, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.3686, 9.4812, 9.5931, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.7908, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.7997, 9.9067, 9.8293, 9.7526, 9.6764,\n 9.6008, 9.5258, 9.6322, 9.7380, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.0371, 10.1398, 10.0668, 9.9944, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.1558, 10.2565, 10.1855, 10.1149, 10.0448,\n 10.1450, 10.0753, 10.1750, 10.2743, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.8872, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In the crypt are interred the remains of Voltaire and Rousseau, Hugo and Zola, assassinated Socialist leader Jean Jaurys, and Louis Braille, the inventor of the alphabet for the blind.\nHypothesis: The remains of Voltaire and Rousseau, Hugo and Zola, assassinated Socialist leader Jean Jaurys, and Louis Braille are all interred in the crypt.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "5", + "Fraction of T in Greenlist": "2.5%", + "z-score": "-7.33", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.8295, -2.8808, -2.9314, -2.9814, -3.0308, -3.0796, -3.1278, -3.1754,\n -3.2225, -3.2691, -3.3151, -3.3607, -3.4057, -3.4503, -3.4945, -3.5382,\n -3.5814, -3.6242, -3.6667, -3.7087, -3.7503, -3.7916, -3.8325, -3.8730,\n -3.9132, -3.9530, -3.9925, -4.0316, -4.0705, -4.1090, -4.1472, -4.1851,\n -4.2227, -4.2601, -4.2971, -4.3339, -4.3704, -4.4066, -4.4426, -4.4783,\n -4.5138, -4.5491, -4.5840, -4.6188, -4.6533, -4.6876, -4.7217, -4.7556,\n -4.7892, -4.8226, -4.8559, -4.8889, -4.9217, -4.9543, -4.9868, -5.0190,\n -5.0511, -5.0829, -5.1146, -5.1461, -5.1775, -5.2086, -5.2396, -5.2705,\n -5.3011, -5.3316, -5.3620, -5.3921, -5.4222, -5.4521, -5.4818, -5.5114,\n -5.5408, -5.5701, -5.5992, -5.6282, -5.6571, -5.6858, -5.7144, -5.7429,\n -5.7712, -5.7994, -5.8275, -5.8554, -5.8832, -5.9109, -5.9385, -5.9660,\n -5.9933, -6.0205, -6.0476, -6.0746, -6.1015, -6.1283, -6.1549, -6.1815,\n -6.2079, -6.2342, -6.2605, -6.2866, -6.3126, -6.3385, -6.3644, -6.3901,\n -6.4157, -6.4413, -6.4667, -6.4920, -6.5173, -6.5424, -6.5675, -6.5924,\n -6.6173, -6.6421, -6.6668, -6.6914, -6.7160, -6.7404, -6.7648, -6.7890,\n -6.8132, -6.8373, -6.8614, -6.8853, -6.9092, -6.9330, -6.9567, -6.9803,\n -7.0039, -7.0273, -7.0507, -7.0741, -7.0973, -7.1205, -7.1436, -7.1667,\n -7.1896, -7.2125, -7.2354, -7.2581, -7.2808, -7.3034, -7.3260])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712, 3.5796, 3.8497, 3.6667,\n 3.9279, 4.1812, 4.4272, 4.6663, 4.4907, 4.7237, 4.5547, 4.3916, 4.6188,\n 4.4610, 4.3083, 4.5301, 4.7469, 4.9592, 5.1671, 5.3708, 5.2223, 5.4222,\n 5.2778, 5.4740, 5.6667, 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 5.8140,\n 5.6830, 5.5549, 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828,\n 6.0622, 6.2302, 6.1118, 5.9954, 6.1612, 6.0469, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.2505, 6.4065, 6.5607, 6.4550, 6.6075,\n 6.5033, 6.6541, 6.8034, 6.7006, 6.5993, 6.4993, 6.4008, 6.5483, 6.4510,\n 6.5970, 6.7416, 6.8849, 7.0268, 6.9305, 7.0711, 6.9759, 6.8819, 7.0211,\n 6.9282, 6.8364, 6.7456, 6.8834, 7.0201, 6.9303, 6.8414, 6.9768, 6.8889,\n 7.0231, 7.1563, 7.0692, 7.2012, 7.1149, 7.2459, 7.3758, 7.2904, 7.2058,\n 7.1220, 7.0391, 7.1678, 7.0857, 7.2134, 7.3402, 7.4661, 7.5910, 7.5094,\n 7.6335, 7.5526, 7.4724, 7.5955, 7.5161, 7.4373, 7.3592, 7.4813, 7.6026,\n 7.5251, 7.6456, 7.5687, 7.6883, 7.8072, 7.7308, 7.8489, 7.7732, 7.8905,\n 8.0070, 7.9318, 7.8571, 7.7831, 7.7096, 7.8253, 7.7524, 7.8673, 7.9816,\n 8.0952, 8.2082, 8.1356, 8.2479, 8.1758, 8.1043, 8.2158, 8.1448, 8.0742,\n 8.0042, 8.1150, 8.2252, 8.1556, 8.0865, 8.1960, 8.1273, 8.2362, 8.3446,\n 8.2762, 8.3840, 8.3161, 8.4232, 8.5298, 8.4623, 8.3952, 8.3286, 8.2624,\n 8.3683, 8.3024, 8.4078, 8.5126, 8.6169, 8.7207, 8.6551, 8.7584, 8.6932,\n 8.6284, 8.7311, 8.6667, 8.6026, 8.5390, 8.6411, 8.7427, 8.6794, 8.7805,\n 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: there's a uh a couple called um oh i'm going to forgot his name now uh Dirkson\nHypothesis: I can't remember their name\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.9456, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.3389, 1.2710, 1.2039, 1.3770, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 1.1183, 1.0659, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 1.1746, 1.1239,\n 1.0735, 1.0235, 1.1593, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.6336, 1.5848, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.5492, 1.5020, 1.4551, 1.4084, 1.5323,\n 1.6555, 1.7780, 1.7310, 1.6843, 1.8058, 1.7592, 1.8799, 1.8333,\n 1.9533, 1.9068, 2.0259, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "58.9%", + "z-score": "6.69", + "p value": "1.12e-11", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 5.5156, 5.7735, 5.4611, 5.1711,\n 5.4306, 5.6804, 5.4175, 5.1698, 4.9358, 4.7140, 4.9652, 5.2085, 5.4444,\n 5.6737, 5.4678, 5.6921, 5.9106, 6.1237, 6.3317, 6.5350, 6.7337, 6.5433,\n 6.3594, 6.1815, 6.0093, 6.2075, 6.4019, 6.2361, 6.0751, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.3333, 6.5158, 6.3687, 6.5485, 6.7254, 6.5823, 6.4425,\n 6.3058, 6.4807, 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.5991, 6.4738,\n 6.6395, 6.8031, 6.6803, 6.5597, 6.4413, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Poirot, I exclaimed, with relief, and seizing him by both hands, I dragged him into the room. \nHypothesis: Poirot was now back and I was sorry that he would take over what I now considered my own investigation. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 1.2019, 1.4606,\n 1.7132, 1.9599, 1.8489, 2.0889, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.7450, 1.6667, 1.8543, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.5843, 1.5119, 1.4403, 1.6187, 1.7951, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.9262, 1.8559, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.9437, 2.1049, 2.0381,\n 1.9720, 1.9066, 2.0651, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 2.1182, 2.0548, 2.2074, 2.1442, 2.0817, 2.0197, 2.1700, 2.3190,\n 2.2569, 2.1954, 2.3426, 2.2813, 2.4271, 2.5717, 2.5103, 2.6536,\n 2.5925, 2.5318, 2.4717, 2.4121, 2.3529, 2.2943, 2.2361, 2.3764,\n 2.3183, 2.4574, 2.3995, 2.3422, 2.4797, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.3891, 2.5238, 2.4678, 2.4122, 2.3570, 2.4902, 2.4351,\n 2.5672, 2.5123, 2.4578, 2.5886, 2.7186, 2.8478, 2.7930, 2.7386,\n 2.6846, 2.8124, 2.7585, 2.7050, 2.6519, 2.5990, 2.7253, 2.6726,\n 2.6203, 2.5683, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.2377, 2.1886, 2.1398, 2.0913, 2.0430,\n 2.1648, 2.1167, 2.0688, 2.0212, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "155", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "61.9%", + "z-score": "10.6", + "p value": "1.21e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 7.7710, 7.6512, 7.8000, 7.9472, 8.0928, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.2619, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.0323, 9.1590, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.4563, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.9813, 10.0984, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.0472, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.7635, 10.8729, 10.7828, 10.8916,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 10.9497, 10.8631, 10.7772,\n 10.8838, 10.7987, 10.7143, 10.6306, 10.7367, 10.6537, 10.7594, 10.6771,\n 10.5955, 10.7006, 10.6196])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The word aswan actually means trade or market in ancient Egyptian, signifying its most pre-eminent activity.\nHypothesis: Aswan's meaning has not changed over time.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.5023, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.7570, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.2894, -0.3299, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "62.1%", + "z-score": "12.1", + "p value": "8.29e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 5.9604, 6.1968, 5.9214, 5.6622, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.1621, 10.0698, 9.9783, 9.8877, 9.7980,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.7622, 9.8753, 9.9878, 9.9015,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.0611, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.7480, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.2602, 11.1807, 11.1018, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.0702, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.6297, 11.5549, 11.6514, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.6217, 11.7169, 11.6441, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.9701, 12.0630])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: well what the market can bear and\nHypothesis: THe market can't bear any of it.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.8402, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 1.1476, 1.3308, 1.2599, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.3101, 1.4809, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.8850, 1.0370, 0.9812, 0.9258, 1.0759, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.3128, 1.4535, 1.5933, 1.5396,\n 1.6781, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.7158, 1.6641, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.2377, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.5159, 1.6378, 1.5916, 1.5457, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "199", + "Fraction of T in Greenlist": "100.0%", + "z-score": "24.4", + "p value": "3.76e-132", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 9.1652, 9.3274, 9.4868, 9.6437, 9.7980,\n 9.9499, 10.0995, 10.2470, 10.3923, 10.5357, 10.6771, 10.8167, 10.9545,\n 11.0905, 11.2250, 11.3578, 11.4891, 11.6189, 11.7473, 11.8743, 12.0000,\n 12.1244, 12.2474, 12.3693, 12.4900, 12.6095, 12.7279, 12.8452, 12.9615,\n 13.0767, 13.1909, 13.3041, 13.4164, 13.5277, 13.6382, 13.7477, 13.8564,\n 13.9642, 14.0712, 14.1774, 14.2829, 14.3875, 14.4914, 14.5945, 14.6969,\n 14.7986, 14.8997, 15.0000, 15.0997, 15.1987, 15.2971, 15.3948, 15.4919,\n 15.5885, 15.6844, 15.7797, 15.8745, 15.9687, 16.0624, 16.1555, 16.2481,\n 16.3401, 16.4317, 16.5227, 16.6132, 16.7033, 16.7929, 16.8819, 16.9706,\n 17.0587, 17.1464, 17.2337, 17.3205, 17.4069, 17.4929, 17.5784, 17.6635,\n 17.7482, 17.8326, 17.9165, 18.0000, 18.0831, 18.1659, 18.2483, 18.3303,\n 18.4120, 18.4932, 18.5742, 18.6548, 18.7350, 18.8149, 18.8944, 18.9737,\n 19.0526, 19.1311, 19.2094, 19.2873, 19.3649, 19.4422, 19.5192, 19.5959,\n 19.6723, 19.7484, 19.8242, 19.8997, 19.9750, 20.0499, 20.1246, 20.1990,\n 20.2731, 20.3470, 20.4206, 20.4939, 20.5670, 20.6398, 20.7123, 20.7846,\n 20.8567, 20.9284, 21.0000, 21.0713, 21.1424, 21.2132, 21.2838, 21.3542,\n 21.4243, 21.4942, 21.5639, 21.6333, 21.7025, 21.7715, 21.8403, 21.9089,\n 21.9773, 22.0454, 22.1133, 22.1811, 22.2486, 22.3159, 22.3830, 22.4499,\n 22.5167, 22.5832, 22.6495, 22.7156, 22.7816, 22.8473, 22.9129, 22.9783,\n 23.0434, 23.1084, 23.1733, 23.2379, 23.3024, 23.3666, 23.4307, 23.4947,\n 23.5584, 23.6220, 23.6854, 23.7487, 23.8118, 23.8747, 23.9374, 24.0000,\n 24.0624, 24.1247, 24.1868, 24.2487, 24.3105, 24.3721, 24.4336])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: And to show just how fast Japan's new rulers were catching on, two punitive expeditions were launched against Korea and China in the grand manner of 19th-century gunboat diplomacy.\nHypothesis: Japan's new rulers were catching on quickly.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "13.1%", + "z-score": "-3.89", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -3.0417, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.2206, -3.0706, -3.1071, -3.1433, -3.1794, -3.2152, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.2460, -3.2811, -3.3160, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.5555, -3.4140, -3.4478, -3.4816,\n -3.5151, -3.5485, -3.5817, -3.6148, -3.6477, -3.6805, -3.7131, -3.5753,\n -3.6080, -3.6407, -3.6731, -3.7055, -3.7376, -3.7697, -3.8016, -3.8333,\n -3.8649, -3.7306, -3.7624, -3.7940, -3.8255, -3.8569, -3.8881])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.3221, 3.5590, 3.7905, 4.0166,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.6790, 4.5569, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.2485, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.0943, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.4993, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 6.8354,\n 6.9759, 6.8819, 6.7890, 6.9282, 7.0662, 6.9743, 7.1110, 7.2466,\n 7.3810, 7.2900, 7.2001, 7.1111, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.5494, 7.6785, 7.5912, 7.7192, 7.8463, 7.9724, 7.8859, 7.8003,\n 7.9254, 8.0497, 7.9649, 7.8808, 8.0042, 8.1266, 8.2483, 8.1650,\n 8.0824, 8.0006, 8.1214, 8.2413, 8.3605, 8.4788, 8.5964, 8.5153,\n 8.4348, 8.5516, 8.6677, 8.7831, 8.8978, 9.0117, 8.9319, 8.8527,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.4752, 9.5840, 9.6921, 9.6148, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.7828, 9.7072, 9.8131, 9.9184, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.2591, 10.1846, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 10.8702, 10.7978, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 11.0450, 10.9740, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.2872, 11.2171, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: When the trust fund begins running cash deficits in 2016, the government as a whole must come up with the cash to finance Social Security's cash deficit by reducing any projected non-Social Security surpluses, borrowing from the public, raising other taxes, or reducing other government spending.\nHypothesis: The public would generally prefer to see the government reduce its spending in other areas to finance Social Security.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.5718, -2.6135, -2.6550, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -3.0417, -3.0792,\n -3.1165, -3.1536, -3.1905, -3.2271, -3.2636, -3.2998, -3.3359, -3.3717,\n -3.2206, -3.2567, -3.2925, -3.1433, -3.1794, -3.2152, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.2811, -3.3160, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.5555, -3.5890, -3.6224, -3.6556,\n -3.6887, -3.7216, -3.7543, -3.7869, -3.8194, -3.6805, -3.7131, -3.7455,\n -3.6080, -3.6407, -3.6731, -3.7055, -3.7376, -3.7697, -3.6345, -3.6667,\n -3.6987, -3.7306, -3.5970, -3.6291, -3.6610, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "156", + "Fraction of T in Greenlist": "78.4%", + "z-score": "17.4", + "p value": "4.57e-68", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 6.9830, 7.1832, 7.3786, 7.5697, 7.3485,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.8780, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 8.6828, 8.8426, 9.0000,\n 9.1551, 8.9815, 8.8121, 8.9672, 9.1201, 9.2710, 9.4198, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.1036,\n 10.2404, 10.3758, 10.5096, 10.3621, 10.2172, 10.3510, 10.4834, 10.6145,\n 10.7442, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 11.2376, 11.3608,\n 11.4829, 11.3489, 11.4704, 11.5909, 11.7104, 11.5799, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 11.7978, 11.9138, 12.0289, 12.1432, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.4708, 12.5812, 12.6909, 12.7998, 12.6815,\n 12.5646, 12.6735, 12.7815, 12.8889, 12.9955, 12.8813, 12.9875, 13.0931,\n 13.1979, 13.3022, 13.4057, 13.5086, 13.6109, 13.4999, 13.6019, 13.7032,\n 13.8039, 13.6950, 13.5871, 13.6878, 13.7878, 13.8873, 13.9862, 13.8804,\n 13.9790, 14.0771, 14.1746, 14.2715, 14.3680, 14.4639, 14.5593, 14.4562,\n 14.5513, 14.6459, 14.7400, 14.6385, 14.5379, 14.6319, 14.7255, 14.8187,\n 14.9113, 14.8124, 14.9048, 14.9967, 15.0882, 15.1792, 15.2698, 15.3600,\n 15.4498, 15.3530, 15.4425, 15.5316, 15.6203, 15.5249, 15.4302, 15.5188,\n 15.6070, 15.6949, 15.7823, 15.6891, 15.7763, 15.8631, 15.9496, 16.0357,\n 16.1214, 16.2068, 16.2917, 16.2003, 16.2851, 16.3695, 16.4536, 16.3633,\n 16.2736, 16.3577, 16.4414, 16.5247, 16.6078, 16.5193, 16.6021, 16.6846,\n 16.7668, 16.8487, 16.9302, 17.0115, 17.0924, 17.0055, 17.0862, 17.1667,\n 17.2468, 17.1609, 17.0754, 17.1556, 17.2354, 17.3149, 17.3941])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: These organizations invest the time and effort to understand their processes and how those processes contribute to or hamper mission accomplishment.\nHypothesis: These organizations invest lots of time to understand how some processes can contribute to or haampe\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.2010, -0.2503, -0.2993, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.5283, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.6437, 0.5991, 0.5548, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "62.1%", + "z-score": "12.1", + "p value": "8.29e-34", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 8.7967, 8.9455, 9.0924, 8.9489,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.6667, 9.8015, 9.6719, 9.8058, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.3347, 10.2132, 10.3397,\n 10.4650, 10.3459, 10.4704, 10.3532, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.2587, 10.1479, 10.2706, 10.3923, 10.2833, 10.4042, 10.5243, 10.4169,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.3411, 10.4592, 10.5763, 10.4745,\n 10.5909, 10.7066, 10.6061, 10.5067, 10.6218, 10.7362, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.9291, 10.8363, 10.7444, 10.8544, 10.9637, 10.8729, 10.9816, 11.0897,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.0371, 10.9497, 11.0562, 11.1621,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.2789, 11.3812, 11.4829, 11.4009, 11.3196,\n 11.4209, 11.5217, 11.4411, 11.3610, 11.4614, 11.5613, 11.4819, 11.5813,\n 11.6802, 11.6016, 11.5234, 11.6220, 11.7200, 11.6425, 11.5655, 11.6632,\n 11.7604, 11.6840, 11.7808, 11.8771, 11.8014, 11.7261, 11.8221, 11.9176,\n 11.8429, 11.7687, 11.8638, 11.9586, 11.8849, 11.9792, 12.0731, 12.0000,\n 11.9273, 12.0209, 12.1141, 12.0419, 11.9701, 12.0630])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: and it is nice talking to you all righty\nHypothesis: I talk to you every day.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.7332, 0.9456, 1.1547,\n 1.0742, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.0773, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.7256, 0.6667, 0.6083, 0.7707, 0.9316, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 1.0541,\n 0.9972, 0.9409, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.6029, 0.5547, 0.5069, 0.6430, 0.7784, 0.9129,\n 0.8645, 0.9979, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.0215, 0.9742, 0.9272, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 0.9062, 0.8607, 0.8154, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.7102, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 3.7712, 3.5796, 3.8497, 3.6667,\n 3.9279, 3.7524, 3.5839, 3.8367, 3.6742, 3.9196, 4.1586, 4.0012, 3.8490,\n 4.0814, 4.3083, 4.1603, 4.0166, 4.2378, 4.4544, 4.3142, 4.1779, 4.0451,\n 3.9158, 4.1265, 4.3333, 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854,\n 5.3716, 5.2463, 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919,\n 5.7735, 5.6573, 5.8275, 5.7133, 5.8812, 5.7689, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.5137, 6.4065, 6.3008, 6.1968, 6.0943,\n 6.2483, 6.1471, 6.2994, 6.1996, 6.1012, 6.2517, 6.1546, 6.3035, 6.4510,\n 6.3549, 6.2601, 6.4059, 6.5504, 6.4566, 6.3640, 6.5069, 6.6486, 6.5569,\n 6.4663, 6.3768, 6.2883, 6.4283, 6.5672, 6.7049, 6.8414, 6.9768, 7.1111,\n 7.2443, 7.1563, 7.2884, 7.2012, 7.3322, 7.4622, 7.5912, 7.7192, 7.8463,\n 7.7598, 7.6742, 7.5895, 7.5056, 7.6315, 7.5484, 7.6734, 7.5910, 7.5094,\n 7.6335, 7.5526, 7.6758, 7.7981, 7.9196, 8.0403, 8.1602, 8.0798, 8.0002,\n 7.9212, 7.8429, 7.9619, 7.8842, 8.0024, 7.9253, 7.8489, 7.9663, 7.8905,\n 8.0070, 8.1229, 8.0476, 7.9729, 8.0880, 8.2024, 8.1282, 8.0546, 8.1683,\n 8.2813, 8.2082, 8.1356, 8.0636, 8.1758, 8.2874, 8.3984, 8.5088, 8.6186,\n 8.7278, 8.8364, 8.9444, 8.8726, 8.8013, 8.7305, 8.6603, 8.7676, 8.6978,\n 8.8045, 8.7351, 8.6662, 8.7724, 8.7039, 8.8094, 8.9145, 9.0190, 9.1230,\n 9.2265, 9.1584, 9.0906, 9.0233, 8.9565, 9.0593, 8.9929, 9.0952, 9.0292,\n 8.9635, 9.0653, 9.0000, 9.1013, 9.2022, 9.1372, 9.0726, 9.1730, 9.2729,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: You have access to the facts. \nHypothesis: The facts are accessible to you.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "26.2%", + "z-score": "0.279", + "p value": "0.39", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "50.0%", + "z-score": "8.08", + "p value": "3.16e-16", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.3217, 4.1586, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.0451, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.0943, 5.9932, 5.8936, 6.0474, 6.1996, 6.3502, 6.2517, 6.1546,\n 6.0587, 5.9641, 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.1283,\n 6.0380, 6.1820, 6.3248, 6.4663, 6.3768, 6.5169, 6.4283, 6.3408,\n 6.2541, 6.1685, 6.0838, 6.0000, 5.9171, 6.0553, 6.1924, 6.3283,\n 6.2459, 6.1644, 6.0837, 6.2183, 6.1382, 6.0590, 6.1923, 6.3246,\n 6.4558, 6.5861, 6.7155, 6.8439, 6.9714, 7.0980, 7.2236, 7.1443,\n 7.0658, 7.1904, 7.1125, 7.0353, 7.1590, 7.2818, 7.2051, 7.1291,\n 7.0537, 7.1755, 7.2966, 7.4168, 7.5364, 7.4613, 7.3869, 7.5056,\n 7.4317, 7.3584, 7.4762, 7.5933, 7.5204, 7.6368, 7.5644, 7.4927,\n 7.4215, 7.3508, 7.2807, 7.3960, 7.5106, 7.6246, 7.5548, 7.4855,\n 7.4168, 7.3485, 7.2807, 7.2134, 7.3263, 7.4386, 7.5504, 7.4833,\n 7.4167, 7.5277, 7.6381, 7.7480, 7.6816, 7.7908, 7.7249, 7.6594,\n 7.5944, 7.5297, 7.4655, 7.4017, 7.3383, 7.4465, 7.5542, 7.6613,\n 7.5981, 7.5353, 7.4729, 7.5794, 7.5173, 7.4556, 7.5614, 7.6667,\n 7.7715, 7.8758, 7.9796, 8.0829])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Build environment Engineering Manufacturing Production (all rate tooling) (1st set of production tooling)\nHypothesis: It is the first set of production tooling for manufacturing.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.0563, -0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.0416, 0.0829, 0.2067, 0.1650, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 6.8419, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 11.8704, 11.9754, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.3985, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.9011, 13.0000, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.4021, 13.4977, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.9690, 13.8804, 13.9735, 13.8857, 13.7986, 13.8914, 13.9838, 13.8976,\n 13.8120, 13.9042, 13.9959, 13.9111, 14.0025, 13.9185, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.2939, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.8219, 14.7406, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.1761, 15.2609, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: As long you have your own household in order, fretting about your neighbor's spending habits is a lot like fretting about the color of his living-room rug.\nHypothesis: You should worry about the color of your neighbor's rug. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "74", + "Fraction of T in Greenlist": "37.2%", + "z-score": "3.97", + "p value": "3.59e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.9599, 2.2011, 2.0889, 1.9795, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.6348, 1.8516,\n 2.0647, 2.2743, 2.1798, 2.3851, 2.5873, 2.7863, 2.6914, 2.5981,\n 2.5064, 2.7005, 2.6098, 2.8006, 2.7107, 2.6222, 2.8093, 2.9938,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.3556, 3.2686, 3.1829, 3.0984,\n 3.2717, 3.1879, 3.1052, 3.0237, 2.9433, 2.8638, 2.7854, 2.7080,\n 2.8764, 3.0429, 3.2077, 3.1300, 3.0533, 3.2157, 3.3764, 3.2998,\n 3.2242, 3.1493, 3.3075, 3.2332, 3.1597, 3.3156, 3.2426, 3.1704,\n 3.3243, 3.4768, 3.6279, 3.5556, 3.4839, 3.4130, 3.3428, 3.2733,\n 3.2044, 3.1363, 3.2841, 3.2163, 3.1492, 3.0827, 3.2285, 3.3731,\n 3.5166, 3.4499, 3.3838, 3.5256, 3.6664, 3.6004, 3.5350, 3.4701,\n 3.6091, 3.5446, 3.4806, 3.6181, 3.7547, 3.6908, 3.6274, 3.5645,\n 3.5022, 3.4403, 3.3789, 3.3181, 3.4521, 3.5853, 3.7176, 3.6566,\n 3.7878, 3.9181, 4.0476, 3.9865, 4.1150, 4.0541, 4.1816, 4.1210,\n 4.0608, 4.0011, 3.9418, 3.8829, 4.0087, 4.1338, 4.2582, 4.1992,\n 4.1406, 4.0825, 4.0247, 3.9673, 3.9104, 3.8538, 3.9762, 3.9198,\n 3.8638, 3.8081, 3.9294, 4.0501, 4.1700, 4.1143, 4.0589, 4.1779,\n 4.2962, 4.2409, 4.1859, 4.1312, 4.2485, 4.1940, 4.3106, 4.2563,\n 4.2023, 4.1487, 4.0953, 4.0423, 3.9896, 4.1048, 4.0522, 4.0000,\n 3.9481, 3.8964, 3.8451, 3.9590, 3.9078, 3.8569, 3.9699])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.6702, 8.8168, 8.9618, 8.8271, 8.9709, 8.8389, 8.9815,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.4704, 10.5940, 10.7164, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.0851, 11.2025, 11.3189, 11.4345, 11.5492,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.1117, 12.2207, 12.1136, 12.2221, 12.3299, 12.4370, 12.3317, 12.2275,\n 12.3343, 12.2314, 12.3377, 12.4434, 12.5485, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.4567, 12.5604, 12.4625, 12.5657, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.5853, 12.6867, 12.7875, 12.6939, 12.7943, 12.7017,\n 12.8017, 12.7100, 12.8095, 12.9085, 13.0071, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.4086, 13.5039, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.6117, 13.5250, 13.6188, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.7270, 13.8193, 13.7350, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.2046, 14.2939, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.3970, 14.4850, 14.5726, 14.6599, 14.7468, 14.8333,\n 14.7533, 14.8396, 14.7601, 14.8462, 14.9318, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah so um also of course they they can they join the they can always join the military service they are considered citizens i believe\nHypothesis: They can't join the military service\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "71", + "Fraction of T in Greenlist": "35.7%", + "z-score": "3.48", + "p value": "0.000252", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.5627, 2.8098, 2.6811, 2.5560,\n 2.7952, 2.6726, 2.5533, 2.7852, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.7765, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.7638, 1.6908, 1.8677, 2.0426, 2.2156,\n 2.3868, 2.3126, 2.2393, 2.4077, 2.5743, 2.5011, 2.4286, 2.3570,\n 2.2862, 2.2162, 2.1470, 2.0785, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.7143, 1.8716, 1.8084, 1.9640,\n 2.1182, 2.2711, 2.2074, 2.3586, 2.2952, 2.4449, 2.3817, 2.3190,\n 2.4669, 2.4045, 2.3426, 2.4887, 2.4271, 2.3660, 2.3054, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.0948, 2.0373, 1.9803,\n 2.1210, 2.0642, 2.2037, 2.3422, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.2535, 2.1980, 2.3333, 2.2780, 2.4122, 2.5456, 2.6781, 2.8098,\n 2.9406, 2.8845, 3.0143, 2.9584, 2.9029, 2.8478, 2.7930, 2.7386,\n 2.8666, 2.9938, 3.1203, 3.0657, 3.1912, 3.1368, 3.0827, 3.0290,\n 2.9756, 2.9225, 3.0464, 3.1696, 3.2921, 3.2389, 3.3606, 3.3075,\n 3.2547, 3.2023, 3.1502, 3.0984, 3.2186, 3.3381, 3.4570, 3.4050,\n 3.5232, 3.4713, 3.4198, 3.3686, 3.3177, 3.2671, 3.3838, 3.5000,\n 3.6156, 3.5648, 3.6797, 3.6291, 3.5787, 3.5286, 3.4788])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "60.0%", + "z-score": "11.3", + "p value": "7.59e-30", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 6.6469, 6.3805, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.7543, 6.5354, 6.3254, 6.1237,\n 6.3317, 6.1389, 5.9530, 6.1584, 5.9797, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.4738, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.0982, 5.9876,\n 6.1492, 6.0404, 5.9333, 6.0928, 5.9874, 5.8835, 5.7812, 5.6804,\n 5.8377, 5.7382, 5.8936, 6.0474, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.4059, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.8819, 7.0211, 7.1591, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.3810, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.9460, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.3453, 8.2588, 8.3813, 8.2956, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 9.0134, 8.9285, 9.0453, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.4213, 9.3380, 9.2554, 9.3686, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.5840, 9.6921, 9.6148, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.9184, 10.0231, 10.1273, 10.0523, 10.1559,\n 10.0814, 10.0074, 9.9340, 10.0371, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.6455, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 11.0663,\n 10.9939, 10.9220, 10.8505, 10.9480, 11.0450, 11.1415, 11.2376, 11.1667,\n 11.0961, 11.1919, 11.2872])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: She smiled back.\nHypothesis: She was so happy she couldn't stop smiling.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 1.1471, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.2910,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.3856, 1.5511, 1.7150, 1.6498, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.6591, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.7522, 1.6958, 1.8385, 1.7823,\n 1.7264, 1.8676, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.8699, 2.0068, 1.9524, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732,\n 1.8204, 1.9540, 1.9013, 1.8490, 1.9813, 1.9291, 1.8773, 2.0083,\n 1.9566, 1.9052, 2.0350, 1.9837, 1.9327, 1.8821, 1.8317, 1.9599,\n 2.0873, 2.2140, 2.1634, 2.1131, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 2.1145, 2.0656, 2.0170, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 2.0212, 2.1418, 2.2618, 2.2141, 2.3333,\n 2.2857, 2.2384, 2.1913, 2.1444, 2.0979, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641, 3.7808, 3.5382,\n 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140, 4.5033, 4.3027, 4.5556,\n 4.8008, 4.6101, 4.8488, 4.6663, 4.8990, 4.7237, 4.5547, 4.3916, 4.6188,\n 4.4610, 4.6829, 4.8999, 4.7469, 4.9592, 4.8107, 5.0186, 4.8742, 4.7336,\n 4.5968, 4.8003, 4.6667, 4.8662, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854,\n 5.0602, 4.9377, 4.8177, 5.0034, 4.8857, 5.0684, 5.2485, 5.1326, 5.3100,\n 5.1962, 5.3709, 5.2590, 5.1490, 5.0410, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.4000, 5.5630, 5.4610, 5.3605, 5.2615, 5.4222, 5.3245,\n 5.4832, 5.3867, 5.5435, 5.6986, 5.6032, 5.7566, 5.6622, 5.5691, 5.4772,\n 5.6286, 5.5377, 5.6875, 5.8358, 5.7458, 5.8926, 5.8035, 5.9488, 5.8606,\n 5.7735, 5.6874, 5.8310, 5.7457, 5.8878, 6.0288, 5.9442, 6.0838, 6.0000,\n 6.1383, 6.0553, 5.9732, 5.8919, 6.0287, 5.9481, 6.0837, 6.2183, 6.1382,\n 6.2716, 6.4040, 6.5354, 6.6658, 6.5861, 6.5072, 6.4291, 6.5583, 6.4807,\n 6.6089, 6.5320, 6.6591, 6.7854, 6.9107, 7.0353, 7.1590, 7.0823, 7.0063,\n 6.9310, 7.0537, 6.9789, 7.1007, 7.2217, 7.1474, 7.2675, 7.3869, 7.5056,\n 7.6235, 7.5495, 7.4762, 7.4034, 7.5204, 7.4482, 7.5644, 7.4927, 7.6082,\n 7.7230, 7.8372, 7.9507, 8.0636, 7.9921, 7.9211, 7.8507, 7.9628, 7.8928,\n 8.0042, 8.1150, 8.0455, 8.1556, 8.2652, 8.3742, 8.4826, 8.4133, 8.3446,\n 8.2762, 8.3840, 8.3161, 8.4232, 8.5298, 8.4623, 8.5683, 8.6738, 8.7788,\n 8.8832, 8.8160, 8.7492, 8.6828, 8.7867, 8.7207, 8.8240, 8.9268, 8.8612,\n 8.9635, 9.0653, 9.1667, 9.2676, 9.2022, 9.1372, 9.0726, 9.1730, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: According to the Office of the Actuary at the Health Care Financing Administration, the estimated net present value of future additional resources needed to fund HI benefits alone over the 75 years is $4.\nHypothesis: The net present value of future additional resources for funding HI benefits was $4.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.4%", + "z-score": "1.11", + "p value": "0.133", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.3242, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, 0.1459, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.4714, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.6199, 0.5740, 0.7044, 0.8340, 0.7878, 0.7419, 0.8704,\n 0.8245, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.8866, 1.0106, 0.9659, 1.0890, 1.0444, 1.0000,\n 1.1221, 1.0777, 1.0336, 1.1547, 1.1106])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 5.8140, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.8641, 6.7583, 6.6541, 6.8034, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.2650, 7.4044, 7.3068,\n 7.2104, 7.3485, 7.2532, 7.3901, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.6867, 7.5967, 7.7268, 7.6376,\n 7.5494, 7.6785, 7.5912, 7.5048, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.8406, 7.9649, 7.8808, 8.0042, 8.1266, 8.0434, 7.9608,\n 7.8791, 8.0006, 7.9196, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.3550, 8.2760, 8.1976, 8.1198, 8.0427, 8.1594, 8.2754,\n 8.3906, 8.5052, 8.6190, 8.7323, 8.6556, 8.7681, 8.6921, 8.6166,\n 8.7284, 8.6535, 8.7647, 8.8752, 8.9851, 8.9107, 9.0200, 8.9461,\n 8.8728, 8.9815, 9.0895, 9.0167, 8.9444, 9.0518, 8.9800, 8.9087,\n 9.0155, 9.1218, 9.2276, 9.1567, 9.2619, 9.1915, 9.1215, 9.2261,\n 9.3302, 9.4338, 9.3642, 9.4673, 9.5698, 9.5007, 9.6028, 9.5341,\n 9.6356, 9.7367, 9.8373, 9.9374, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.2650, 10.3628, 10.4603, 10.3923, 10.3248, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Thus, with respect to the litigation services Congress has funded, there is no alternative channel for expression of the advocacy Congress seeks to restrict.\nHypothesis: This is the only channel of expression of the advocacy that Congress seeks to restrict.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.1253, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.4175, 5.1698, 5.4174, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.3088, 9.2094, 9.3333, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.3496, 10.4636, 10.3695, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.7141, 10.8224, 10.9301, 10.8426, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.0952, 11.0102, 10.9259, 10.8423, 10.9473, 10.8644,\n 10.7822, 10.7006, 10.8051, 10.9091, 11.0125, 10.9317, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.1018, 11.0235, 11.1245, 11.0468,\n 10.9697, 10.8931, 10.9936, 11.0937, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.1867, 12.1141, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: How do we fix this?'\nHypothesis: Can we fix this?\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 0.9847,\n 1.1628, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 1.1785,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.2337, 1.1721, 1.1111, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.3460, 1.2865, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.3943, 1.3373, 1.2808, 1.4289,\n 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.4963, 1.4410, 1.3862,\n 1.5291, 1.4744, 1.4201, 1.3663, 1.3128, 1.2597, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.5714, 1.5187, 1.4662, 1.6028, 1.5505, 1.4985,\n 1.6337, 1.5818, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.5423, 1.6732, 1.6230, 1.5731, 1.7028, 1.6530, 1.6036,\n 1.7321, 1.6827, 1.6336, 1.7609, 1.7119, 1.6632, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.6737, 1.7974, 1.7498, 1.7025,\n 1.8252, 1.7780, 1.7310, 1.8527, 1.8058, 1.7592, 1.8799, 1.8333,\n 1.7870, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.5916, 7.4536, 7.3183, 7.4790, 7.3467, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.7277, 8.6164,\n 8.7515, 8.6418, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 9.9392, 10.0577, 9.9601, 9.8634, 9.9813, 9.8858, 9.7912, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.9547, 9.8632, 9.9783, 10.0926, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.1627, 10.2743, 10.1865, 10.0995,\n 10.2106, 10.1243, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.0342, 9.9524, 10.0611, 10.1692, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.3284, 10.4341, 10.3544, 10.4596, 10.3805, 10.4852, 10.4067,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 10.8673, 10.9669,\n 10.8925, 10.8186, 10.9178, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.4766, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: but that takes too much planning\nHypothesis: It doesn't take much planning.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "88", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "38.6%", + "z-score": "2.95", + "p value": "0.00157", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.9366, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.6353, 1.8728, 1.7685, 2.0000,\n 1.8970, 1.7963, 2.0211, 2.2418, 2.1412, 2.3570, 2.5690, 2.4689,\n 2.6765, 2.8808, 2.7811, 2.9814, 2.8830, 2.7863, 2.6914, 2.8868,\n 2.7928, 2.7005, 2.6098, 2.5205, 2.4327, 2.6222, 2.5352, 2.4495,\n 2.3651, 2.2819, 2.4667, 2.6491, 2.5660, 2.4841, 2.4034, 2.5820,\n 2.7585, 2.6778, 2.8518, 2.7717, 2.9433, 3.1129, 3.0330, 2.9542])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.5819, 8.7250, 8.8667, 8.7419, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.1614, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 10.7835, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.6041, 11.7130, 11.8212, 11.7222, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.0902, 12.1936, 12.2963, 12.3985, 12.5001, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.7279, 12.6387, 12.7376,\n 12.8359, 12.9337, 12.8456, 12.7581, 12.8556, 12.9527, 12.8661, 12.9628,\n 13.0590, 12.9732, 12.8881, 12.9840, 13.0795, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.3829, 13.3002, 13.3933, 13.4859, 13.5781,\n 13.6698, 13.7612, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 13.9606,\n 14.0502, 14.1393, 14.0593, 13.9797, 14.0687, 14.1573, 14.2455, 14.1667,\n 14.2546, 14.3422, 14.4294, 14.3513, 14.4382, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: I did not mention Monica in my lecture, but the first question I was asked was how President Clinton could do his job with all the distractions caused by the Monica Lewinsky affair.\nHypothesis: They wanted to get through the lecture without any problems.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.0196, -2.0692, -2.1183, -2.1669, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.1256, -1.9437, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.4975, -2.3422, -2.1880, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.1685, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.2406, -2.2785, -2.3163,\n -2.1762, -2.2140, -2.0751, -2.1131, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "197", + "Fraction of T in Greenlist": "99.0%", + "z-score": "24.1", + "p value": "1.08e-128", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 8.2923, 8.4697, 8.6436, 8.8141, 8.9815,\n 9.1458, 9.3074, 9.4662, 9.6225, 9.7763, 9.9278, 10.0771, 10.2242,\n 10.3692, 10.5123, 10.6535, 10.7928, 10.9304, 11.0663, 11.2006, 11.3333,\n 11.4645, 11.5943, 11.7226, 11.8495, 11.9751, 12.0994, 12.2224, 12.3443,\n 12.4649, 12.5844, 12.7028, 12.8201, 12.9364, 13.0516, 13.1658, 13.2791,\n 13.3913, 13.5027, 13.6132, 13.7227, 13.8315, 13.9393, 14.0464, 14.1526,\n 14.2581, 14.3627, 14.4667, 14.5699, 14.6723, 14.7741, 14.8751, 14.9755,\n 15.0753, 15.1743, 15.2728, 15.3706, 15.4677, 15.5643, 15.6603, 15.7557,\n 15.8505, 15.9448, 16.0385, 16.1317, 16.2243, 16.3165, 16.4081, 16.4992,\n 16.5898, 16.6799, 16.7695, 16.8586, 16.9473, 17.0355, 17.1233, 17.2106,\n 17.2975, 17.3839, 17.4700, 17.5556, 17.6407, 17.7255, 17.8099, 17.8939,\n 17.9775, 18.0607, 18.1435, 18.2259, 18.3080, 18.3897, 18.4710, 18.5520,\n 18.6327, 18.7130, 18.7929, 18.8725, 18.9518, 19.0307, 19.1094, 19.1877,\n 19.2657, 19.3433, 19.4207, 19.4977, 19.5745, 19.6509, 19.7271, 19.8030,\n 19.8785, 19.9538, 20.0288, 20.1035, 20.1780, 20.2522, 20.3261, 20.3997,\n 20.4731, 20.5462, 20.6190, 20.6916, 20.7640, 20.8361, 20.9079, 20.9795,\n 21.0509, 21.1220, 21.1929, 21.2635, 21.3339, 21.4041, 21.4740, 21.5438,\n 21.6132, 21.6825, 21.7516, 21.8204, 21.8890, 21.9574, 22.0256, 22.0936,\n 22.1614, 22.2289, 22.2963, 22.3635, 22.4304, 22.4972, 22.5637, 22.6301,\n 22.6963, 22.7622, 22.8280, 22.8936, 22.9590, 23.0243, 23.0893, 23.1542,\n 23.2189, 23.2834, 23.3477, 23.4118, 23.4758, 23.5396, 23.6032, 23.6667,\n 23.7300, 23.7931, 23.8560, 23.9188, 23.9814, 24.0439, 24.1062])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Analyzing Postal Service accounts for depreciation, fuel, and maintenance for city delivery carriers, we have estimated the average city delivery vehicle cost per route.\nHypothesis: Driving cost estimates can be averaged will sufficient data.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, 0.0842, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.7746, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.0284, 1.1711, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.9739, 0.9245, 1.0598, 1.0105, 0.9615, 1.0954,\n 1.0465, 1.1794, 1.1305, 1.0820, 1.2136, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.1852, 1.1380, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.1587, 1.2839, 1.2377, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.3474, 1.4699, 1.4241, 1.3786, 1.5000,\n 1.4546, 1.5752, 1.5298, 1.4846, 1.4397, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.1779, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.6790, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.4721, 4.6571, 4.8394, 5.0190, 4.9075,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.3521, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.7652, 8.6770, 8.5896, 8.7104, 8.8304, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.2159, 9.1302, 9.2463, 9.1615, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.4513, 9.5638, 9.4812, 9.3993, 9.3181, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.4124, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.4299, 9.5381, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.2591, 10.3617, 10.2872, 10.3893, 10.3154, 10.2419, 10.3435, 10.2706,\n 10.3717, 10.2993, 10.2273, 10.3280, 10.4281, 10.5278, 10.4563, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.6111, 10.7090, 10.8064, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Because it plays on my childhood imagination.\nHypothesis: The art plays on my young imagination.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.7192, 0.8601, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.8245, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.7461, 0.7029, 0.6598, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 8.0656, 8.2195, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.5649, 8.7093,\n 8.8522, 8.9935, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.3086, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.4858, 9.6156, 9.5021, 9.3901, 9.5191, 9.6470, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.8237, 9.9469, 10.0692, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 11.8392,\n 11.9457, 12.0516, 12.1568, 12.2615, 12.3655, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.4834, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.6084, 14.6986, 14.7885,\n 14.8779, 14.9669, 15.0555, 15.1438, 15.2316, 15.1440, 15.2316, 15.3188,\n 15.2321, 15.3191, 15.2331, 15.3198, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.5783, 15.4940, 15.5792, 15.6640, 15.7485, 15.8327, 15.9165, 16.0000,\n 16.0832, 16.1660, 16.2486, 16.3308, 16.4127, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: it's slow it's uh there are many better machines on the market right now for\nHypothesis: This is the fastest machine, you won't find a better machine.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.6366, 0.5855, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.3146, 0.2689, 0.2234, 0.3563,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.6885, 0.6437, 0.5991, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.7177, 0.6737, 0.7979, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "58.9%", + "z-score": "6.69", + "p value": "1.12e-11", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415, 4.3409, 4.6268,\n 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569, 5.8890, 6.1143, 6.3333,\n 6.5465, 6.7543, 6.5354, 6.7402, 6.9402, 7.1358, 7.3271, 7.1241, 6.9282,\n 7.1187, 7.3054, 7.4885, 7.3030, 7.4839, 7.3051, 7.1317, 6.9631, 7.1435,\n 7.3208, 7.4952, 7.3333, 7.1756, 7.0219, 6.8718, 6.7254, 6.5823, 6.4425,\n 6.3058, 6.1721, 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.4738,\n 6.3509, 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: At the heart of the sanctuary, a small granite shrine once held the sacred barque of Horus himself.\nHypothesis: Horus is a god.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.5275,\n -1.3578, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.4142, -1.2686, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.6087, -1.4777, -1.3474, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.2232, 7.1243, 7.0268, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.9630, 7.8699, 7.7778, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.2956, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.0267, 8.9448, 8.8636, 8.9783, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.4752, 9.3979, 9.5066, 9.4299, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.8131, 9.9184, 10.0231, 10.1273, 10.0523, 9.9778,\n 9.9038, 10.0074, 9.9340, 10.0371, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.3717, 10.2993, 10.3999, 10.5001, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.4846, 10.4140, 10.5128, 10.6111, 10.7090, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Consider the United States Postal Service.\nHypothesis: Forget the United States Postal Service.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "108", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "27.8%", + "z-score": "0.667", + "p value": "0.252", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.6667])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547, 0.9802, 1.3608,\n 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284, 3.1177, 3.3968, 3.2222,\n 3.0551, 2.8947, 2.7406, 2.5924, 2.4495, 2.7136, 2.5744, 2.8301, 2.6943,\n 2.9424, 2.8098, 2.6811, 2.5560, 2.4345, 2.3163, 2.5533, 2.7852, 2.6681,\n 2.8943, 3.1160, 3.3333, 3.2167, 3.4293, 3.3147, 3.5228, 3.4101, 3.6141,\n 3.5032, 3.3947, 3.2883, 3.4873, 3.3824, 3.2796, 3.1787, 3.3729, 3.2733,\n 3.4641, 3.3657, 3.2691, 3.1741, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.9057, 2.8189, 3.0000, 2.9140, 2.8292, 3.0071, 2.9231, 2.8402, 2.7585,\n 2.6778, 2.5983, 2.5198, 2.4423, 2.6148, 2.7854, 2.7080, 2.6316, 2.5560,\n 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.1213, 2.2862, 2.4495, 2.3791,\n 2.3094, 2.4703, 2.4010, 2.3324, 2.2646, 2.1974, 2.3552, 2.5117, 2.4444,\n 2.5991, 2.7524, 2.9044, 3.0551, 2.9872, 2.9200, 2.8534, 2.7875, 2.7222,\n 2.8701, 2.8051, 2.7406, 2.6768, 2.6135, 2.7591, 2.9035, 3.0467, 3.1889,\n 3.1251, 3.0619, 2.9991, 3.1395, 3.0770, 3.0151, 3.1539, 3.0923, 3.2299,\n 3.1685, 3.1076, 3.0471, 3.1831, 3.1229, 3.2577, 3.1977, 3.1382, 3.0792,\n 3.0206, 2.9625, 2.9048, 2.8475, 2.9798, 2.9227, 2.8660, 2.9971, 2.9406,\n 2.8845, 3.0143, 2.9584, 2.9029, 2.8478, 2.7930, 2.7386, 2.6846, 2.6309,\n 2.7585, 2.8853, 2.8316, 2.7783, 2.7253, 2.6726, 2.6203, 2.5683, 2.5166,\n 2.4653, 2.4142, 2.5386, 2.6623, 2.6112, 2.7340, 2.8561, 2.9776, 2.9263,\n 3.0469, 2.9957, 3.1156, 3.0645, 3.0138, 2.9633, 2.9132, 2.8633, 2.8137,\n 2.7644, 2.8825, 3.0000, 2.9507, 3.0674, 3.1836, 3.2991, 3.4142, 3.3645,\n 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In full costume.\nHypothesis: He is wearing a mascot costume.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 2.8947, 2.7406, 2.5924, 2.8577,\n 2.7136, 2.9704, 2.8301, 2.6943, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.4816, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.3147, 3.2026, 3.0929, 3.2998, 3.1918, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.4743, 3.3729, 3.2733, 3.1754,\n 3.0793, 2.9848, 2.8919, 2.8006, 2.9887, 3.1743, 3.0833, 2.9938,\n 2.9057, 3.0873, 3.2667, 3.1789, 3.0924, 3.0071, 2.9231, 2.8402,\n 2.7585, 2.6778, 2.8518, 2.7717, 2.6928, 2.6148, 2.5378, 2.7080,\n 2.6316, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.5927,\n 2.7552, 2.6828, 2.6112, 2.5403, 2.7001, 2.6296, 2.5600, 2.4910,\n 2.4228, 2.3552, 2.2884, 2.4444, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.2711, 2.4227, 2.3586, 2.2952, 2.2323, 2.1700, 2.1082,\n 2.0470, 1.9863, 1.9261, 1.8665, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.4470, 1.5818, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.4427, 1.3933, 1.3443, 1.2956, 1.4254,\n 1.3768, 1.3284, 1.4570, 1.4087, 1.3607, 1.3131, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.2435, 1.1990, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 1.9415, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.0211, 2.2418, 2.1412, 2.0428, 2.2576, 2.4689,\n 2.6765, 2.5775, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.5205, 2.7107, 2.8983, 2.8093, 2.7217,\n 2.6354, 2.5504, 2.7333, 2.6491, 2.5660, 2.7456, 2.9231, 3.0984,\n 3.0151, 3.1879, 3.1052, 3.0237, 2.9433, 2.8638, 3.0330, 2.9542,\n 2.8764, 2.7995, 2.9656, 3.1300, 3.2928, 3.2157, 3.1394, 3.0641,\n 2.9897, 3.1493, 3.0754, 3.0022, 3.1597, 3.3156, 3.4702, 3.3968,\n 3.5496, 3.4768, 3.4047, 3.3333, 3.2627, 3.4130, 3.3428, 3.2733,\n 3.2044, 3.3526, 3.4995, 3.6452, 3.5762, 3.5079, 3.4402, 3.3731,\n 3.5166, 3.4499, 3.3838, 3.5256, 3.6664, 3.8061, 3.7399, 3.8784,\n 3.8125, 3.7471, 3.6824, 3.6181, 3.7547, 3.6908, 3.6274, 3.5645,\n 3.6995, 3.8335, 3.9666, 3.9036, 3.8411, 3.7791, 3.7176, 3.8490,\n 3.7878, 3.7270, 3.8571, 3.9865, 4.1150, 4.0541, 4.1816, 4.1210,\n 4.0608, 4.0011, 3.9418, 4.0678, 4.0087, 3.9501, 3.8919, 4.0166,\n 4.1406, 4.2639, 4.2056, 4.1477, 4.0901, 4.0330, 4.1549, 4.0980,\n 4.0415, 4.1624, 4.2827, 4.4023, 4.3456, 4.4644, 4.4080, 4.3519,\n 4.2962, 4.2409, 4.3585, 4.3033, 4.2485, 4.1940, 4.3106, 4.4265,\n 4.5419, 4.4873, 4.4331, 4.3792, 4.3256, 4.4399, 4.3864, 4.3333,\n 4.4468, 4.5596, 4.6720, 4.6188, 4.7305, 4.6775, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Although your journey is going to be difficult and at times you will doubt yourselves, the benefits to clients are well worth it and the satisfaction of hearing even the most recalcitrant of adversaries say that the new system is better than the one that went before is deeply rewarding.\nHypothesis: The new system appears far more complex, but ultimately easier and more thorough.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.5547, -0.5990, -0.6430, -0.5037, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.5706, -0.6128, -0.6547, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.6333, -0.6737, -0.7139, -0.5864, -0.6266, -0.6667,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.3051, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.4856, 8.3324,\n 8.1825, 8.0358, 7.8923, 8.0498, 8.2054, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.0068, 9.1455, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.3386, 9.2229, 9.3550,\n 9.2410, 9.3721, 9.2600, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.6547, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.9123, 10.8186,\n 10.7257, 10.8363, 10.9462, 11.0554, 10.9637, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.7199, 12.8160, 12.7329, 12.8285,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.5408, 13.4620, 13.3838, 13.3060, 13.3967, 13.4871, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Take a remarkable statistic that Shesol cites but lets pass relatively unexamined.\nHypothesis: They had data that was very relevant but under used.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.3112, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.1255, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.7468, -0.5717, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, 0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, 0.0000,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.0201, 7.2222, 7.4194, 7.6120, 7.3786, 7.1550, 6.9402,\n 6.7338, 6.5350, 6.7337, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 7.3051, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.3281, 8.1742, 8.3324,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.5218, 8.3859, 8.2525, 8.4017, 8.5491, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.3397,\n 10.4650, 10.3459, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 10.9669, 11.0851, 11.2025, 11.3189, 11.2069, 11.3228,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.2221, 12.3299, 12.4370, 12.5434, 12.4383,\n 12.3343, 12.4405, 12.3377, 12.2360, 12.3419, 12.4471, 12.3468, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.7660, 12.8679, 12.9692, 12.8719,\n 12.7755, 12.8766, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.7638, 13.8593, 13.9543, 14.0488,\n 14.1428, 14.0503, 14.1440, 14.2373, 14.3301, 14.4225, 14.5144, 14.6059,\n 14.6970, 14.6062, 14.6970, 14.7874, 14.6976, 14.7877, 14.8773, 14.7885,\n 14.8779, 14.7898, 14.8789, 14.7916, 14.7049, 14.7939, 14.7079, 14.6225,\n 14.7113, 14.7998, 14.8878, 14.8034, 14.8912, 14.9786, 15.0657, 15.1524,\n 15.2387, 15.3247, 15.2414, 15.3272, 15.4126, 15.4976, 15.5823, 15.5000,\n 15.4182, 15.5028, 15.4217, 15.5060, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: It might have seemed like manna from heaven - up to thousands of dollars dropping, often unexpectedly, into the hands of a half-million Kentucky and Indiana residents this month.\nHypothesis: A recent law proposed by congress has resulted in the seizure of thousands of dollars from Kentucky residents.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.4%", + "z-score": "-1.15", + "p value": "0.876", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.2323, -2.0641, -2.1082,\n -2.1519, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.0068, -2.0476, -1.8983, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.7679, -1.8086, -1.6641, -1.7049, -1.7454, -1.7857, -1.6432,\n -1.6836, -1.7237, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.7609, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.6830, -1.7213, -1.5878, -1.6262, -1.4938, -1.5323,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.2435, -1.2817, -1.1547])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.1305, 2.8868,\n 3.2206, 2.9938, 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.5850, 4.7703, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.1490, 5.3211, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.0943, 5.9932, 6.1471, 6.2994, 6.4501, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.3901, 7.2960, 7.2029, 7.3386, 7.2466,\n 7.3810, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.4173, 8.5381, 8.6581, 8.5732,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.5607, 8.6783, 8.5964, 8.5153,\n 8.4348, 8.5516, 8.4718, 8.3927, 8.5088, 8.6241, 8.7388, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.2118, 9.3212, 9.4299, 9.5381, 9.4619, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.6322, 9.5577, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.7574, 9.8611, 9.9642, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.0547, 10.1558, 10.0848, 10.1855, 10.1149, 10.2151,\n 10.1450, 10.0753, 10.0061, 10.1058, 10.0371, 10.1363, 10.2350, 10.3333,\n 10.4312, 10.5286, 10.4603, 10.5573, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Get individuals to invest their time and the funding will follow.\nHypothesis: If individuals will invest their time, funding will come along, too.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.3333, -0.3797, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.4147, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "199", + "Fraction of T in Greenlist": "100.0%", + "z-score": "24.4", + "p value": "3.76e-132", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 9.1652, 9.3274, 9.4868, 9.6437, 9.7980,\n 9.9499, 10.0995, 10.2470, 10.3923, 10.5357, 10.6771, 10.8167, 10.9545,\n 11.0905, 11.2250, 11.3578, 11.4891, 11.6189, 11.7473, 11.8743, 12.0000,\n 12.1244, 12.2474, 12.3693, 12.4900, 12.6095, 12.7279, 12.8452, 12.9615,\n 13.0767, 13.1909, 13.3041, 13.4164, 13.5277, 13.6382, 13.7477, 13.8564,\n 13.9642, 14.0712, 14.1774, 14.2829, 14.3875, 14.4914, 14.5945, 14.6969,\n 14.7986, 14.8997, 15.0000, 15.0997, 15.1987, 15.2971, 15.3948, 15.4919,\n 15.5885, 15.6844, 15.7797, 15.8745, 15.9687, 16.0624, 16.1555, 16.2481,\n 16.3401, 16.4317, 16.5227, 16.6132, 16.7033, 16.7929, 16.8819, 16.9706,\n 17.0587, 17.1464, 17.2337, 17.3205, 17.4069, 17.4929, 17.5784, 17.6635,\n 17.7482, 17.8326, 17.9165, 18.0000, 18.0831, 18.1659, 18.2483, 18.3303,\n 18.4120, 18.4932, 18.5742, 18.6548, 18.7350, 18.8149, 18.8944, 18.9737,\n 19.0526, 19.1311, 19.2094, 19.2873, 19.3649, 19.4422, 19.5192, 19.5959,\n 19.6723, 19.7484, 19.8242, 19.8997, 19.9750, 20.0499, 20.1246, 20.1990,\n 20.2731, 20.3470, 20.4206, 20.4939, 20.5670, 20.6398, 20.7123, 20.7846,\n 20.8567, 20.9284, 21.0000, 21.0713, 21.1424, 21.2132, 21.2838, 21.3542,\n 21.4243, 21.4942, 21.5639, 21.6333, 21.7025, 21.7715, 21.8403, 21.9089,\n 21.9773, 22.0454, 22.1133, 22.1811, 22.2486, 22.3159, 22.3830, 22.4499,\n 22.5167, 22.5832, 22.6495, 22.7156, 22.7816, 22.8473, 22.9129, 22.9783,\n 23.0434, 23.1084, 23.1733, 23.2379, 23.3024, 23.3666, 23.4307, 23.4947,\n 23.5584, 23.6220, 23.6854, 23.7487, 23.8118, 23.8747, 23.9374, 24.0000,\n 24.0624, 24.1247, 24.1868, 24.2487, 24.3105, 24.3721, 24.4336])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: i often think gee i wish i had a video camera because i could sure use ten thousand dollars so but i like things like Evening Shade with Burt Reynolds i really enjoy that and uh\nHypothesis: Video cameras that I need are very expensive.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "26.0%", + "z-score": "0.203", + "p value": "0.42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "197", + "Fraction of T in Greenlist": "99.0%", + "z-score": "24.1", + "p value": "1.08e-128", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 5.3333, 5.6045, 5.8635, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 8.2923, 8.4697, 8.6436, 8.8141, 8.9815,\n 9.1458, 9.3074, 9.4662, 9.6225, 9.7763, 9.9278, 10.0771, 10.2242,\n 10.3692, 10.5123, 10.6535, 10.7928, 10.9304, 11.0663, 11.2006, 11.3333,\n 11.4645, 11.5943, 11.7226, 11.8495, 11.9751, 12.0994, 12.2224, 12.3443,\n 12.4649, 12.5844, 12.7028, 12.8201, 12.9364, 13.0516, 13.1658, 13.2791,\n 13.3913, 13.5027, 13.6132, 13.7227, 13.8315, 13.9393, 14.0464, 14.1526,\n 14.2581, 14.3627, 14.4667, 14.5699, 14.6723, 14.7741, 14.8751, 14.9755,\n 15.0753, 15.1743, 15.2728, 15.3706, 15.4677, 15.5643, 15.6603, 15.7557,\n 15.8505, 15.9448, 16.0385, 16.1317, 16.2243, 16.3165, 16.4081, 16.4992,\n 16.5898, 16.6799, 16.7695, 16.8586, 16.9473, 17.0355, 17.1233, 17.2106,\n 17.2975, 17.3839, 17.4700, 17.5556, 17.6407, 17.7255, 17.8099, 17.8939,\n 17.9775, 18.0607, 18.1435, 18.2259, 18.3080, 18.3897, 18.4710, 18.5520,\n 18.6327, 18.7130, 18.7929, 18.8725, 18.9518, 19.0307, 19.1094, 19.1877,\n 19.2657, 19.3433, 19.4207, 19.4977, 19.5745, 19.6509, 19.7271, 19.8030,\n 19.8785, 19.9538, 20.0288, 20.1035, 20.1780, 20.2522, 20.3261, 20.3997,\n 20.4731, 20.5462, 20.6190, 20.6916, 20.7640, 20.8361, 20.9079, 20.9795,\n 21.0509, 21.1220, 21.1929, 21.2635, 21.3339, 21.4041, 21.4740, 21.5438,\n 21.6132, 21.6825, 21.7516, 21.8204, 21.8890, 21.9574, 22.0256, 22.0936,\n 22.1614, 22.2289, 22.2963, 22.3635, 22.4304, 22.4972, 22.5637, 22.6301,\n 22.6963, 22.7622, 22.8280, 22.8936, 22.9590, 23.0243, 23.0893, 23.1542,\n 23.2189, 23.2834, 23.3477, 23.4118, 23.4758, 23.5396, 23.6032, 23.6667,\n 23.7300, 23.7931, 23.8560, 23.9188, 23.9814, 24.0439, 24.1062])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The man on the ground thinks for a moment and yells back, You must work in management.\nHypothesis: There was no one on the ground, man or woman.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.3849, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.5227, 0.6768, 0.8296, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.5879, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.2940, 0.4189, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "69.2%", + "z-score": "14.4", + "p value": "4.57e-47", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 7.0557, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.4896, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.2348, 10.1405, 10.0472, 9.9547, 10.0698, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 11.8427, 11.9455, 12.0476, 11.9594, 12.0611, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.2895, 13.3829, 13.3002, 13.2182, 13.3113, 13.2299,\n 13.3227, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.4499,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: If, however, the evaluation question requires GAO to report on how satisfactory progress is or the reasons for problems in implementation, the more staff who can be on site over time, with the ricHe'st or thickest base for examining the situation as the many people involved see it, the sounder our causal conclusions and subsequent recommendations will be.\nHypothesis: If the GAO has to report on the progress, the recommendations will be much poorer quality.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.5227, 0.6768, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.4845, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.5740, 0.5283, 0.4828, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 6.8718, 7.0456, 6.8995, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.7442, 9.8716, 9.9980, 9.8852, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.1614, 10.2833, 10.4042, 10.5243, 10.4169,\n 10.5363, 10.6547, 10.5490, 10.6667, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.9229, 10.8215, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.3091, 11.4184, 11.5271, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.6584, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.6971, 11.8018, 11.7108, 11.6206, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.7696, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.8172, 11.7326, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.3754, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.1746, 13.2668, 13.1878, 13.1094,\n 13.0314, 13.1233, 13.0460, 12.9691, 12.8928, 12.9845, 12.9087, 12.8333,\n 12.9247, 13.0157, 12.9410, 12.8667, 12.7928, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The economy could be still better.\nHypothesis: The economy has never been better.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.0486, -0.0969, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.2304, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.6202, -0.6598, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.5", + "p value": "2.35e-36", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 11.1073, 11.0183, 10.9301, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.0756, 10.9898, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.2789, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.5217, 11.4411, 11.3610, 11.4614, 11.3820, 11.4819, 11.4031,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.6840, 11.6082, 11.7050, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.3888, 12.3143, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Hills and mountains are especially sanctified in the cult of Jainism.\nHypothesis: The cult of Jainism hates nature.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.5311, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.0473, -0.0943, -0.1410, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.5864, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.2515, 4.0825,\n 3.9196, 3.7626, 4.0012, 3.8490, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.8772, 3.7417, 3.9620, 4.1779, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.5118, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.1490, 5.3211, 5.4909, 5.3825, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.9333, 6.0928, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.4868, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.1553, 9.2729, 9.1856,\n 9.3024, 9.4185, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.9091, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.5414, 11.6412, 11.5613, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.7200, 11.8176, 11.9147, 12.0114,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.6155, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The famous tenements (or lands ) began to be built.\nHypothesis: The land remained deserted.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.7410, -1.7778, -1.6496, -1.6865, -1.7233, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.9628, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.2483, 6.1471, 6.0474, 6.1996, 6.1012, 6.0041, 5.9084,\n 5.8139, 5.9641, 6.1128, 6.2601, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.7414, 6.8819, 6.7890, 6.6973, 6.8364, 6.9743, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.7536, 6.8889, 7.0231, 6.9361, 7.0692, 6.9830,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.6315, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.0006, 8.1214, 8.2413, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.3550, 8.2760, 8.3927, 8.3143, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.6556, 8.5796, 8.6921, 8.6166,\n 8.7284, 8.6535, 8.7647, 8.6903, 8.8008, 8.7270, 8.6537, 8.5810,\n 8.6908, 8.6186, 8.5469, 8.4757, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.8379, 8.9447, 9.0510, 9.1567, 9.0863, 9.1915, 9.1215, 9.0520,\n 9.1566, 9.2607, 9.1916, 9.2952, 9.2265, 9.1584, 9.0906, 9.1936,\n 9.2960, 9.2287, 9.3306, 9.2637, 9.3651, 9.4661, 9.5666, 9.6667,\n 9.7663, 9.6996, 9.7987, 9.8974, 9.8311, 9.7653, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In the midst of this amazing amalgam of cultures is a passion for continuity.\nHypothesis: A passion for continuity is not the most important of these cultures.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.4191, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.4174, 0.5547, 0.5069, 0.4593, 0.4121, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.4481, 0.4021, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, 0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.1650, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.8320, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.0064, 8.1684, 8.3281, 8.4856, 8.3324,\n 8.4884, 8.3391, 8.4936, 8.6461, 8.7967, 8.6522, 8.8015, 8.6603,\n 8.8082, 8.9544, 9.0990, 8.9618, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.5534,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.9570, 9.8367, 9.9656, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.1124, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.3812, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.8889, 11.0047, 10.8995, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.1500, 11.2623, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.9650, 11.8704, 11.9754, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.1012, 12.2040, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.3277, 12.4286, 12.3391, 12.4395, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.5615, 12.6601, 12.7581, 12.8556, 12.7690, 12.8661, 12.7802,\n 12.8769, 12.9732, 13.0690, 12.9840, 13.0795, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.1957, 13.2895, 13.2068, 13.3002, 13.3933, 13.4859, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.5985, 13.6896, 13.6091, 13.6999, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.8113, 13.9007, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.0106, 14.0986, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Putting aside stage as a horse-drawn conveyance, a popular delicatessen, a part of a rocket, and an opportunity to mock Gail Sheehy (who seems to get a free ride from News Quiz participants), this question all but demanded the invention of a violent theatrical event, and that's not easy.\nHypothesis: Gail Sheehy is a popular target for mocking on other shows.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.2182,\n -0.0543, 0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.4229, 0.5620,\n 0.5134, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.4481, 0.5808, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.4327, 0.5610, 0.5164, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 5.9297, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.4560, 9.3386, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 9.8852, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.0855, 10.2061, 10.3257, 10.4444, 10.5623, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.9355, 11.0488, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 11.8392,\n 11.9457, 12.0516, 12.1568, 12.2615, 12.3655, 12.4689, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.5853, 12.6867, 12.7875, 12.8877, 12.9874, 13.0866,\n 12.9935, 12.9011, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.2093, 13.3059, 13.2166, 13.3128, 13.4086, 13.5039, 13.5987, 13.5105,\n 13.6050, 13.5176, 13.6117, 13.7054, 13.6188, 13.7122, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.0025, 14.0936, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Entreaties to the apartment's owner have gone nowhere.\nHypothesis: The apartment's owner is very responsive.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.5181, -1.5759, -1.3608,\n -1.1488, -1.2081, -1.0000, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.1499, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.4376, -1.2943, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.2585, -1.2982, -1.1651, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -1.0890, -1.1279, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.2168, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 6.9903, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.2815, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 8.9086, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.3641, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.0577, 9.9601, 9.8634, 9.9813, 9.8858, 10.0029, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.2514, 10.3630, 10.2743, 10.3853, 10.2975,\n 10.2106, 10.1243, 10.0389, 10.1494, 10.2592, 10.3683, 10.4769, 10.5848,\n 10.5002, 10.6076, 10.7143, 10.8204, 10.7367, 10.6537, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 10.8749, 10.9773, 11.0793, 11.0004, 11.1018, 11.0235, 10.9458, 11.0468,\n 10.9697, 11.0702, 10.9936, 10.9176, 10.8421, 10.7671, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.3335, 11.2607, 11.3572, 11.2848, 11.3809, 11.4766, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.6179, 11.5470, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Mrs. Cavendish is in her mother-in-law's room. \nHypothesis: Mrs. Cavendish has left the building.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "22.2%", + "z-score": "-0.509", + "p value": "0.695", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.1143, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.3791, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.9169, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.5021, 9.6309, 9.7586, 9.8852, 9.7738, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 10.8995, 11.0147, 11.1291,\n 11.0254, 11.1392, 11.0368, 11.1500, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.3950, 11.5048, 11.4065, 11.3091, 11.4184, 11.5271, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.0096, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.6287, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.6601, 12.5732, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.6103, 12.7073, 12.6234, 12.7199, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.3002, 13.2182, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.4263, 13.5179, 13.6091, 13.5292, 13.6201,\n 13.5408, 13.4620, 13.3838, 13.3060, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: But I thought you'd sworn off coffee.\nHypothesis: I thought that you vowed to drink more coffee.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.64", + "p value": "0.000135", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.2019, 1.4606,\n 1.7132, 1.9599, 1.8489, 2.0889, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 2.2418, 2.1412, 2.0428, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.4804, 2.3851, 2.2916, 2.4930, 2.6914, 2.8868,\n 2.7928, 2.9848, 2.8919, 2.8006, 2.7107, 2.6222, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.5660, 2.4841, 2.4034, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.2678, 2.1918, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.4077, 2.5743, 2.7393, 2.6656, 2.5927,\n 2.7552, 2.6828, 2.6112, 2.5403, 2.7001, 2.6296, 2.7875, 2.7175,\n 2.6481, 2.8039, 2.9582, 2.8889, 2.8203, 2.9726, 3.1236, 3.2733,\n 3.2044, 3.3526, 3.2841, 3.2163, 3.1492, 3.0827, 3.0168, 2.9515,\n 3.0967, 3.0317, 2.9673, 2.9035, 3.0467, 2.9832, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.6735, 2.8141, 2.7534, 2.6933, 2.8324, 2.7724,\n 2.7129, 2.6540, 2.7913, 2.9277, 3.0632, 3.1977, 3.1382, 3.0792,\n 3.2124, 3.1536, 3.0952, 3.0373, 3.1690, 3.1113, 3.2419, 3.1844,\n 3.1273, 3.2567, 3.3853, 3.3282, 3.2715, 3.3989, 3.5256, 3.6515,\n 3.5946, 3.7196, 3.6629, 3.6067, 3.5508, 3.4953, 3.4401, 3.3853,\n 3.5085, 3.4539, 3.3996, 3.3457, 3.4677, 3.4140, 3.3606, 3.3075,\n 3.2547, 3.2023, 3.1502, 3.2705, 3.2186, 3.1669, 3.2863, 3.2348,\n 3.1836, 3.1327, 3.2509, 3.3686, 3.4857, 3.6021, 3.5509, 3.5000,\n 3.6156, 3.5648, 3.5143, 3.4641, 3.5787, 3.5286, 3.6425])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 8.1176, 8.0212, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.6029, 11.7031, 11.6219, 11.5414, 11.6412, 11.7405, 11.8393, 11.9377,\n 11.8579, 11.7787, 11.8766, 11.9741, 11.8956, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.8571, 11.9534, 12.0493, 12.1447, 12.2397, 12.3342, 12.4283,\n 12.3523, 12.4460, 12.3705, 12.4638, 12.5568, 12.4818, 12.5745, 12.5000,\n 12.5923, 12.6841, 12.6102, 12.5367, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: We stink all the time.\nHypothesis: We always stink.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.8%", + "z-score": "4.78", + "p value": "8.6e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.8489, 2.0889, 2.3238, 2.2133, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.0211, 1.9215, 2.1412, 2.0428, 1.9462, 2.1602,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.4930, 2.6914, 2.8868,\n 2.7928, 2.9848, 2.8919, 2.8006, 2.7107, 2.6222, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.6491, 2.5660, 2.7456, 2.6632, 2.5820,\n 2.5019, 2.4228, 2.5983, 2.5198, 2.4423, 2.6148, 2.5378, 2.4618,\n 2.3868, 2.5560, 2.4814, 2.6485, 2.5743, 2.5011, 2.6656, 2.5927,\n 2.7552, 2.6828, 2.8433, 3.0022, 3.1597, 3.0870, 3.2426, 3.1704,\n 3.0989, 3.0282, 2.9582, 2.8889, 3.0415, 2.9726, 2.9044, 3.0551,\n 3.2044, 3.3526, 3.2841, 3.2163, 3.1492, 3.2953, 3.4402, 3.5839,\n 3.5166, 3.6590, 3.5920, 3.5256, 3.4599, 3.6004, 3.7399, 3.8784,\n 3.8125, 3.7471, 3.8841, 3.8191, 3.9549, 3.8903, 3.8262, 3.7626,\n 3.6995, 3.8335, 3.7707, 3.7084, 3.8411, 3.7791, 3.7176, 3.6566,\n 3.7878, 3.7270, 3.8571, 3.7966, 3.7366, 3.8655, 3.8057, 3.9337,\n 3.8741, 4.0011, 4.1273, 4.2527, 4.1931, 4.3176, 4.2582, 4.1992,\n 4.1406, 4.0825, 4.0247, 4.1477, 4.0901, 4.0330, 4.1549, 4.2762,\n 4.3967, 4.3395, 4.2827, 4.2262, 4.3456, 4.4644, 4.5826, 4.5260,\n 4.6434, 4.5871, 4.5311, 4.4754, 4.5918, 4.7076, 4.8227, 4.7670,\n 4.7117, 4.8260, 4.7709, 4.8845, 4.8295, 4.7749, 4.7206, 4.6667,\n 4.7792, 4.7255, 4.6720, 4.7838])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "193", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "69.9%", + "z-score": "14.4", + "p value": "1.91e-47", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.0147, 10.9109,\n 11.0254, 10.9229, 10.8215, 10.9355, 10.8353, 10.7362, 10.8498, 10.9626,\n 11.0746, 10.9769, 11.0883, 11.1991, 11.1026, 11.2127, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.4574, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 12.1125, 12.0218, 12.1244,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.2503, 12.1622, 12.2628, 12.3629,\n 12.2758, 12.3754, 12.4746, 12.3883, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.6698, 13.7612, 13.8522, 13.9427, 14.0329, 13.9515, 13.8707, 13.7904,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.1573, 14.2455, 14.3333,\n 14.4208])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In an increasingly interdependent world, many pressing problems that affect Americans can be addressed only through cooperation with other countries.\nHypothesis: We should be independent and stay away from talking and working with other nations. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.3717, -1.4142, -1.4565, -1.3112,\n -1.3536, -1.2096, -1.2521, -1.2943, -1.3362, -1.1942, -1.2362, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.6885, -0.5579, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.5053, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.4949, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "138", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "78.3%", + "z-score": "14.4", + "p value": "1.27e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.3271, 7.5144, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 8.6828, 8.8426, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 9.7312, 9.8754,\n 9.7119, 9.8553, 9.9969, 10.1368, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 11.2022, 11.3293, 11.1791, 11.0315, 11.1588,\n 11.2848, 11.4097, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.6189,\n 11.7395, 11.8589, 11.7239, 11.8429, 11.7104, 11.5799, 11.6988, 11.8168,\n 11.6890, 11.8065, 11.9230, 12.0386, 12.1533, 12.2671, 12.3801, 12.4922,\n 12.6035, 12.7140, 12.5916, 12.4708, 12.3514, 12.4622, 12.5723, 12.6815,\n 12.7900, 12.8978, 13.0048, 12.8889, 12.9955, 13.1015, 13.2067, 13.3113,\n 13.1979, 13.3022, 13.4057, 13.5086, 13.6109, 13.4999, 13.6019, 13.7032,\n 13.5940, 13.6950, 13.5871, 13.4804, 13.5813, 13.6816, 13.5764, 13.6763,\n 13.7757, 13.8745, 13.9728, 14.0705, 14.1677, 14.2644, 14.3605, 14.4562,\n 14.3540, 14.4493])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Ca'daan continued.\nHypothesis: Ca'daan refused to stop.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.4968, 1.3926, 1.6353, 1.8728, 1.7685, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.8980, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.5507, 0.6983, 0.6460, 0.5941,\n 0.7399, 0.8847, 0.8325, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.7878, 0.7419, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.8607, 0.9870, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 1.1339, 1.0890, 1.0444, 1.0000,\n 0.9558, 0.9119, 1.0336, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 5.8635,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.6227, 6.5137, 6.4065, 6.5607, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 6.8931,\n 6.7931, 6.6944, 6.8391, 6.9824, 6.8849, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.3901, 7.5258, 7.4316, 7.5661, 7.6995,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.6867, 7.5967, 7.5076, 7.6376,\n 7.5494, 7.6785, 7.8065, 7.9336, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.6238, 8.5381, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.9285, 8.8443, 8.9612, 9.0773, 8.9940, 8.9113,\n 9.0267, 8.9448, 9.0595, 8.9783, 8.8978, 8.8179, 8.9319, 8.8527,\n 8.9660, 8.8874, 8.8095, 8.7323, 8.8448, 8.9567, 8.8800, 8.9912,\n 8.9151, 8.8396, 8.9502, 9.0601, 9.1694, 9.0944, 9.0200, 8.9461,\n 9.0548, 9.1629, 9.2704, 9.3774, 9.3040, 9.4103, 9.5161, 9.6214,\n 9.7261, 9.6532, 9.7574, 9.6850, 9.7886, 9.8918, 9.9944, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.2565, 10.3566, 10.4563, 10.3853,\n 10.3148, 10.2447, 10.1750, 10.2743, 10.3730, 10.4713, 10.5692, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The rule requires broadcasters to maintain a file for public inspection containing a Children's Television Programming Report and to identify programs specifically designed to educate and inform children at the beginning of those programs and to furnish such information to the publishers of program guides.\nHypothesis: The rule makes broadcasters keep a file about children's television programming.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, 0.0000, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.4857, -0.5283, -0.5706, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.5991, -0.6402, -0.6810,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.7461, -0.7856, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.9438, 5.8275, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.6667, 5.8279, 5.9874, 5.8835, 5.7812, 5.6804,\n 5.5811, 5.7382, 5.8936, 6.0474, 6.1996, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.7555, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.2900, 7.4233, 7.5556, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.3093, 8.2219,\n 8.3453, 8.2588, 8.1731, 8.2956, 8.2107, 8.1266, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.3231, 8.2413, 8.3605, 8.4788, 8.3977, 8.5153,\n 8.4348, 8.5516, 8.6677, 8.5879, 8.7033, 8.8179, 8.9319, 8.8527,\n 8.7742, 8.8874, 9.0000, 8.9221, 9.0340, 9.1452, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.3212, 9.4299, 9.5381, 9.4619, 9.5695, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.6214,\n 9.7261, 9.6532, 9.5808, 9.6850, 9.7886, 9.8918, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.1558, 10.2565, 10.3566, 10.2856, 10.3853,\n 10.3148, 10.2447, 10.3439, 10.2743, 10.3730, 10.3038, 10.2350, 10.1667,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.4893, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: well because how how hot i mean like like in the coldest that it gets in winter down there how much is it\nHypothesis: It's hot all the time where I live, including winter.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.0370, 1.8974, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.3333,\n 2.2269, 2.4495, 2.6679, 2.8823, 2.7757, 2.6713, 2.8804, 2.7775,\n 2.6765, 2.8808, 3.0817, 2.9814, 2.8830, 2.7863, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.8919, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.6354, 2.5504, 2.4667, 2.6491, 2.5660, 2.4841, 2.4034, 2.5820,\n 2.7585, 2.6778, 2.5983, 2.5198, 2.4423, 2.6148, 2.5378, 2.4618,\n 2.3868, 2.5560, 2.7235, 2.8893, 2.8138, 2.7393, 2.6656, 2.5927,\n 2.7552, 2.6828, 2.6112, 2.5403, 2.4703, 2.6296, 2.7875, 2.9439,\n 2.8735, 2.8039, 2.7349, 2.6667, 2.5991, 2.5322, 2.6852, 2.8368,\n 2.9872, 2.9200, 2.8534, 2.7875, 2.7222, 2.6575, 2.5934, 2.7406,\n 2.6768, 2.8226, 2.9673, 3.1109, 3.0467, 2.9832, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.8753, 3.0151, 3.1539, 3.0923, 3.0311, 2.9704,\n 2.9103, 2.8505, 2.7913, 2.9277, 2.8687, 2.8101, 2.9451, 3.0792,\n 3.2124, 3.1536, 3.2857, 3.4170, 3.3582, 3.2998, 3.2419, 3.1844,\n 3.1273, 3.0706, 3.0143, 3.1433, 3.0872, 3.0315, 3.1593, 3.2863,\n 3.4126, 3.3567, 3.3012, 3.4263, 3.3710, 3.3160, 3.4401, 3.5635,\n 3.6862, 3.8081, 3.7528, 3.8740, 3.8189, 3.7641, 3.7097, 3.8297,\n 3.9491, 3.8947, 3.8406, 3.7869, 3.7335, 3.6805, 3.6277, 3.5753,\n 3.6929, 3.6407, 3.5887, 3.5370, 3.6537, 3.7697, 3.7180, 3.6667,\n 3.6156, 3.5648, 3.6797, 3.6291, 3.7432, 3.8569, 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "159", + "Fraction of T in Greenlist": "79.9%", + "z-score": "17.9", + "p value": "7.69e-72", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.1358, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.1882, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.1201, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.9795, 10.1187, 9.9653, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.3621, 10.4952, 10.6270, 10.7575, 10.8866,\n 10.7442, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 11.2376, 11.3608,\n 11.4829, 11.6039, 11.4704, 11.3389, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.9338, 12.0499, 12.1651, 12.2794, 12.1533, 12.0289, 12.1432, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.7017, 12.8110, 12.9196, 12.7998, 12.6815,\n 12.7900, 12.8978, 13.0048, 13.1111, 13.2167, 13.3217, 13.4259, 13.5295,\n 13.4152, 13.3022, 13.4057, 13.5086, 13.6109, 13.7125, 13.8136, 13.9140,\n 14.0139, 14.1131, 14.0036, 13.8952, 13.9944, 14.0930, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.6736, 14.5682, 14.4639, 14.5593, 14.6542,\n 14.7486, 14.8425, 14.9359, 15.0289, 15.1213, 15.2134, 15.1118, 15.0111,\n 15.1031, 15.1946, 15.2857, 15.3764, 15.4666, 15.5563, 15.6457, 15.7346,\n 15.6365, 15.5391, 15.6280, 15.7165, 15.8046, 15.8923, 15.9796, 16.0665,\n 16.1531, 16.2392, 16.1441, 16.0497, 16.1358, 16.2216, 16.3070, 16.3920,\n 16.4767, 16.5610, 16.6450, 16.7286, 16.6363, 16.5446, 16.6282, 16.7115,\n 16.7944, 16.8770, 16.9592, 17.0411, 17.1227, 17.2040, 17.1143, 17.0251,\n 17.1064, 17.1873, 17.2680, 17.3483, 17.4284, 17.5081, 17.5875, 17.6667,\n 17.5793, 17.4925, 17.5716, 17.6504, 17.7290, 17.8072, 17.8852])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Each of the men wore leather armor and dressed in the style of heavy riders.\nHypothesis: The men were naked.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -0.8577, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.0371, -1.0806, -1.1239,\n -0.9802, -0.8374, -0.8811, -0.9245, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -0.9858, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.6737, -0.7139, -0.7539, -0.6266, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "74.0%", + "z-score": "15.8", + "p value": "8.8e-57", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 8.9709, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 9.8187, 9.9495, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.2025, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.4420, 11.3333, 11.2259, 11.3399, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.4471, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.6635, 12.5657, 12.6684, 12.7704, 12.6739,\n 12.7755, 12.8766, 12.9771, 13.0771, 13.1765, 13.2753, 13.3737, 13.2791,\n 13.1852, 13.2834, 13.3810, 13.4780, 13.5746, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.8642, 13.9585, 13.8675, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.4292, 14.3412, 14.4321,\n 14.5226, 14.6126, 14.7023, 14.6155, 14.7049, 14.7939, 14.8825, 14.9707,\n 15.0585, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.4071, 15.3226,\n 15.4085, 15.4940, 15.5792, 15.6640, 15.7485, 15.6651, 15.5823, 15.6667,\n 15.7507, 15.6686, 15.7524, 15.8359])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The man should have died instantly.\nHypothesis: The man was perfectly fine. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.6484, 9.5620, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.4300, 11.3572, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 5.3333, 4.9640, 4.6291, 4.9193, 4.6188, 4.3409, 4.6268,\n 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 4.7140, 4.5033, 4.7556, 4.5556,\n 4.3644, 4.6101, 4.4272, 4.2515, 4.4907, 4.7237, 4.9507, 4.7819, 4.6188,\n 4.8407, 4.6829, 4.5301, 4.7469, 4.5985, 4.8107, 5.0186, 4.8742, 4.7336,\n 4.9373, 5.1371, 5.3333, 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.2463, 5.1236, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.6009,\n 5.7735, 5.6573, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 5.9333, 6.0928, 5.9874, 6.1450, 6.0410, 6.1968, 6.0943,\n 5.9932, 6.1471, 6.0474, 6.1996, 6.1012, 6.2517, 6.4008, 6.5483, 6.6944,\n 6.5970, 6.5008, 6.4059, 6.3122, 6.4566, 6.3640, 6.2725, 6.4153, 6.3248,\n 6.2354, 6.1470, 6.0596, 6.2008, 6.1143, 6.2541, 6.1685, 6.3070, 6.2222,\n 6.3595, 6.2755, 6.4116, 6.3283, 6.4632, 6.5970, 6.7298, 6.8615, 6.7788,\n 6.6968, 6.6157, 6.5354, 6.6658, 6.5861, 6.7155, 6.6365, 6.5583, 6.6865,\n 6.8138, 6.9402, 7.0658, 7.1904, 7.3143, 7.4373, 7.5595, 7.6808, 7.8014,\n 7.9212, 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.2365, 8.3525, 8.2754,\n 8.1988, 8.3140, 8.4286, 8.3526, 8.2772, 8.2024, 8.3162, 8.2420, 8.1683,\n 8.2813, 8.2082, 8.3205, 8.4322, 8.3595, 8.2874, 8.3984, 8.3268, 8.2557,\n 8.3660, 8.2954, 8.4050, 8.3349, 8.2652, 8.3742, 8.4826, 8.5905, 8.6978,\n 8.6284, 8.5595, 8.4911, 8.4232, 8.5298, 8.6359, 8.5683, 8.5012, 8.4345,\n 8.3683, 8.3024, 8.4078, 8.3423, 8.2773, 8.3820, 8.4862, 8.5899, 8.6932,\n 8.6284, 8.5640, 8.5000, 8.4364, 8.5390, 8.6411, 8.7427, 8.8439, 8.9446,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In summer the rice forms a green velvety blanket, then turns golden in autumn when it ripens and is harvested.\nHypothesis: The rice is golden and harvestable in the summer, but turns green in autumn.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.8240, 1.7285, 1.9462, 2.1602,\n 2.0647, 1.9711, 2.1798, 2.3851, 2.2916, 2.1997, 2.1094, 2.3094,\n 2.2200, 2.1320, 2.3276, 2.2404, 2.1546, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.7765, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.7233,\n 1.6524, 1.8257, 1.7552, 1.9262, 2.0954, 2.0247, 1.9548, 2.1213,\n 2.0517, 1.9829, 2.1470, 2.0785, 2.0107, 1.9437, 1.8773, 2.0381,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.7143, 1.8716, 1.8084, 1.9640,\n 1.9009, 2.0548, 2.2074, 2.1442, 2.0817, 2.2323, 2.1700, 2.1082,\n 2.2569, 2.1954, 2.1344, 2.0739, 2.0140, 2.1602, 2.1005, 2.0412,\n 2.1858, 2.1268, 2.0682, 2.2111, 2.1527, 2.2943, 2.2361, 2.3764,\n 2.5156, 2.4574, 2.3995, 2.5373, 2.4797, 2.4225, 2.5589, 2.5019,\n 2.4453, 2.5802, 2.5238, 2.6576, 2.6014, 2.5456, 2.4902, 2.4351,\n 2.3805, 2.5123, 2.4578, 2.5886, 2.5343, 2.6640, 2.7930, 2.7386,\n 2.6846, 2.8124, 2.7585, 2.7050, 2.6519, 2.7783, 2.9040, 2.8508,\n 2.7979, 2.7454, 2.6932, 2.8174, 2.9410, 2.8887, 2.8368, 2.7852,\n 2.9076, 2.8561, 2.8050, 2.7541, 2.7036, 2.6534, 2.7741, 2.7240,\n 2.8440, 2.7940, 2.9132, 3.0317, 2.9817, 2.9320, 3.0496, 3.0000,\n 2.9507, 3.0674, 3.0182, 2.9692, 2.9205, 2.8721, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "56.7%", + "z-score": "10.2", + "p value": "1.02e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 6.8641, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.3485, 7.2532, 7.3901, 7.5258, 7.4316, 7.5661, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.5556, 7.6867, 7.5967, 7.5076, 7.6376,\n 7.7667, 7.6785, 7.8065, 7.9336, 8.0598, 7.9724, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.5607, 8.4788, 8.3977, 8.3172,\n 8.4348, 8.5516, 8.4718, 8.5879, 8.5088, 8.6241, 8.7388, 8.6603,\n 8.5824, 8.6963, 8.6190, 8.5424, 8.6556, 8.7681, 8.6921, 8.8039,\n 8.9151, 9.0257, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.8433, 9.7688, 9.6948, 9.7996,\n 9.7261, 9.6532, 9.5808, 9.5089, 9.4375, 9.5416, 9.6452, 9.5743,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.7043,\n 9.6356, 9.7367, 9.8373, 9.7690, 9.8691, 9.9687, 10.0679, 10.0000,\n 10.0987, 10.1970])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Just as the Daily Worker and New Masses , socialist papers from the 1930s, were peppered with citations of Marx and Engels, Educational Liberator is peppered with references to their libertarian equivalents--Friedrich Hayek and Ludwig von Mises.\nHypothesis: Citations from Marx were used in socialist papers in the 30s. \nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "191", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "29.3%", + "z-score": "1.38", + "p value": "0.084", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.3527, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.9258, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 1.0812, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 1.1547,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.2136, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.2326, 1.1852, 1.1380, 1.2657, 1.2185,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.3786])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 6.7625, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.3927, 9.5304, 9.4000, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 9.9656, 9.8473,\n 9.7306, 9.8590, 9.9863, 10.1124, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.4932, 10.6145, 10.5027, 10.3923, 10.2833, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.6667, 10.7835, 10.8995, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.7222, 11.8299, 11.9370, 11.8392,\n 11.9457, 12.0516, 12.1568, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.5930, 12.6939, 12.6012, 12.5093,\n 12.6099, 12.5188, 12.4286, 12.3391, 12.4395, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.5615, 12.4746, 12.3883, 12.3027, 12.4015, 12.4998, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.7199, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.9491, 12.8680, 12.9621, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.1746, 13.2668, 13.1878, 13.2796,\n 13.3710, 13.2927, 13.3838, 13.4744, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.5897, 13.6789, 13.6025, 13.6914, 13.7801, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: To be sure that any needlework item is the genuine article (as opposed to an inferior import or machine-made piece), look for a lead seal with an M, the emblem of IBTAM meaning it's been certified by the Instituto de Bordado, Tapecaras e Arte?\u00adsanato da Madeira (Institute of Madeiran Embroidery, Tapestry, and Handicrafts), an official island organization that has a showroom/museum on Rua Visconde de Anadia, 44.\nHypothesis: There is a seal to show authenticity in needlework items made in Italy.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.1172, -1.9545, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -1.8091, -1.8523, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.3912, -2.2517, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.1264, -2.1637, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "163", + "Fraction of T in Greenlist": "81.9%", + "z-score": "18.5", + "p value": "4.91e-77", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 8.8426, 8.6667,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.6600, 10.7918, 10.9222, 10.7732, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.6189,\n 11.4829, 11.6039, 11.7239, 11.8429, 11.9609, 12.0779, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.4072, 12.2794, 12.3928, 12.5053, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 13.8497, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.6283, 14.5173, 14.4075, 14.5045, 14.6010, 14.6969,\n 14.7924, 14.8873, 14.9817, 15.0756, 15.1690, 15.2619, 15.1556, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.7680, 15.6667, 15.7560, 15.8450, 15.9335, 16.0216, 16.1093,\n 16.1966, 16.2835, 16.3700, 16.4561, 16.3575, 16.2598, 16.3459, 16.4317,\n 16.5171, 16.6021, 16.6868, 16.7711, 16.8550, 16.9386, 17.0218, 16.9265,\n 16.8320, 16.9152, 16.9982, 17.0807, 17.1630, 17.2449, 17.3265, 17.4078,\n 17.4887, 17.5693, 17.4770, 17.3854, 17.4660, 17.5464, 17.6264, 17.7061,\n 17.7856, 17.8647, 17.9435, 18.0221, 18.1003, 18.0107, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.3103, 18.3871, 18.4637, 18.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: I'm not into this Multivista stuff, because what is some dumpy system when compared to a beautiful hyperextension of the sun somewhere over Kuchara, when compared to the golden hue of onion fried with the kse-fi waves, when compared to the number of dividers for credit membranes in a wallet of a rich man, when compared to the magnificent smell of a briessante roll dunked in wholesome milk synthetically enriched with substances boosting the secretion of happiness hormones, that one from two years ago, not three,' Gonzo said in a tone characteristic for a man who just discovered a solution to his life problem.\nHypothesis: He's not into this multivista stuff because what is some dumpy system compared to the magnificent smell of a cinnamon roll dunked in wholesome milk.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "13", + "Fraction of T in Greenlist": "17.8%", + "z-score": "-1.42", + "p value": "0.922", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.4697, -1.5361, -1.6013, -1.6654, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.3608,\n -1.4190])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "166", + "Fraction of T in Greenlist": "83.4%", + "z-score": "19", + "p value": "4.71e-81", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.4540, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.6894, 11.7992, 11.9083, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.6529, 12.7567, 12.8598,\n 12.9624, 13.0643, 13.1657, 13.2665, 13.3667, 13.4664, 13.5655, 13.6640,\n 13.7621, 13.8595, 13.9565, 14.0530, 14.1489, 14.2443, 14.3393, 14.4338,\n 14.5277, 14.6212, 14.7143, 14.8069, 14.8990, 14.9907, 15.0819, 15.1727,\n 15.2631, 15.3530, 15.4425, 15.5316, 15.6203, 15.7086, 15.7965, 15.8840,\n 15.9711, 16.0578, 16.1441, 16.2301, 16.3156, 16.4009, 16.4857, 16.5702,\n 16.6543, 16.7381, 16.8216, 16.9047, 16.9874, 17.0698, 17.1519, 17.2337,\n 17.3151, 17.3962, 17.4770, 17.5575, 17.6377, 17.7176, 17.7971, 17.8764,\n 17.9554, 18.0340, 18.1124, 18.1905, 18.2683, 18.3458, 18.4230, 18.5000,\n 18.5767, 18.6531, 18.7292, 18.8051, 18.8807, 18.9561, 19.0312])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: No matter what I ended up doing, I think I would have found some way within that profession to do public work, because that's what I was taught, Zelon said. \nHypothesis: This is like using the postition as a banker to help make regulations better for the public.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.3311, 0.4714, 0.6108, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 1.0465, 0.9979, 1.1305, 1.0820, 1.2136, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.1852, 1.3131, 1.4402, 1.3926,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.6737, 1.7974, 1.7498, 1.8728,\n 1.9950, 2.1167, 2.0688, 2.1896, 2.3098, 2.2618, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.3567, 2.4744, 2.4269, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.7%", + "z-score": "13.9", + "p value": "4.93e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 9.8601, 9.7574, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.2348, 10.3496, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.3191, 10.2287, 10.3409, 10.4524, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.6052, 10.5175, 10.6265, 10.7349, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.8838, 10.7987, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 11.9197, 11.8393, 11.9377,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.2288, 13.3196, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.8683])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Clark also expressed the hope that he and Redgrave could continue with their marriage.\nHypothesis: Clark hoped that he could continue their marriage.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.3333,\n -1.0722, -0.8165, -0.8893, -0.9608, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.5636, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.4540, 0.6030, 0.5507, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.9623,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.0879, 1.2257, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.2623, 1.2136, 1.3443, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.6336, 1.5848, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.6737, 1.6262, 1.5791, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 2.7080, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 5.8560, 6.0421, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.1590, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.3582, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.0698, 10.1840, 10.0926, 10.0021,\n 9.9124, 10.0261, 9.9373, 9.8494, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.9689, 10.8867, 10.9906, 10.9091, 10.8282, 10.9317, 11.0346, 10.9545,\n 11.0569, 11.1588, 11.2602, 11.3610, 11.4614, 11.5613, 11.6606, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.2214, 12.3163, 12.2397, 12.1635, 12.0878,\n 12.0127, 11.9380, 12.0327, 12.1270, 12.0529, 12.1468, 12.2403, 12.3333,\n 12.2598, 12.1867, 12.2794, 12.3718, 12.4638, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The poverty, for instance, does not create the sense of shame as it does for people who live in Western countries.\nHypothesis: Poverty doesn't create a sense of shame in any country.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.5262, -0.2981, -0.3696, -0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.6124,\n 0.7625, 0.9115, 0.8575, 1.0050, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.2909, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.1016, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.3411, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.5157, 11.6242, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.7533, 11.8594, 11.7647, 11.8704, 11.7766, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.3985, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 14.1543, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.5226, 14.6126, 14.7023, 14.7916, 14.8804, 14.9689, 14.8825, 14.9707,\n 15.0585, 14.9729, 15.0605, 15.1477, 15.2345, 15.1498, 15.0657, 15.1524,\n 15.2387, 15.1553, 15.0726, 15.1587, 15.2446, 15.3301, 15.4152, 15.3333,\n 15.4182, 15.5028, 15.4217, 15.5060, 15.5900, 15.6736, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Back on the road to Jaisalmer, one last splash of color delights the senses before you plunge into the the fields are dotted with mounds of red hot chili peppers.\nHypothesis: The road to Jaisalmer is bumpy and unpleasant to ride on. \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.6484, 9.5620, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.4300, 11.3572, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 8.7967, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.2418, 9.3831, 9.5229, 9.6612, 9.5258,\n 9.3927, 9.2620, 9.4000, 9.5366, 9.6719, 9.5443, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.6960, 9.5751, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.7306, 9.8590, 9.9863, 10.1124, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.6232, 10.7429, 10.8616, 10.9794, 11.0963,\n 10.9870, 11.1033, 11.2187, 11.1111, 11.2259, 11.1197, 11.2339, 11.1291,\n 11.0254, 10.9229, 11.0368, 11.1500, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.4945, 11.3950, 11.5048, 11.4065, 11.5157, 11.4184, 11.3222, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.0554, 11.1640, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.4592, 11.3721, 11.4762, 11.3899, 11.4935, 11.4080, 11.5111, 11.4263,\n 11.5290, 11.4450, 11.5471, 11.4638, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 11.8210, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.7787, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.1260, 12.2214, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.5221, 12.4460, 12.5394, 12.6323, 12.5568, 12.6494, 12.7416, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: they supplied some uh you know they were some groups uh the Vicksburg if you ever get a chance to go over to Vicksburg the battleground at Vicksburg uh there's an area there where there was uh some Texas uh groups and they had an interesting time there\nHypothesis: There were also North Carolina troops at Vicksburg.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.8926, 0.8238, 1.0079, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.2337, 1.3954, 1.3333, 1.4931, 1.4313, 1.5892, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.7217, 1.6646, 1.6081, 1.7522, 1.8953, 1.8385, 1.9803,\n 1.9237, 2.0642, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.0881, 2.0338, 2.1685, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.0868, 2.0339, 2.1656, 2.1128, 2.0604, 2.1909,\n 2.3206, 2.2680, 2.3967, 2.3443, 2.4721, 2.5990, 2.5466, 2.4944,\n 2.6203, 2.5683, 2.5166, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.5604, 2.6830, 2.6323, 2.5820, 2.5319, 2.4822, 2.4327, 2.5538,\n 2.5044, 2.4553, 2.5754, 2.6949, 2.6458, 2.7644, 2.7154, 2.8333,\n 2.9507, 2.9016, 2.8528, 2.9692, 2.9205, 2.8721, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.7%", + "z-score": "13.5", + "p value": "4.54e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.2706, 10.3923, 10.5131, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.0254, 10.9229, 11.0368, 11.1500, 11.2623, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.5271, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.2040, 12.1125, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.2381, 12.3391, 12.4395, 12.3508, 12.4508, 12.3629,\n 12.4625, 12.5615, 12.4746, 12.3883, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.5129, 12.4289, 12.3455, 12.4430, 12.5401, 12.6367, 12.5542, 12.6504,\n 12.5685, 12.4872, 12.4065, 12.3263, 12.4223, 12.5179, 12.4384, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.2288, 13.1520, 13.0758, 13.1667,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: which i mean i think it should be anyway\nHypothesis: I don't think it should be that way\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.19", + "p value": "0.575", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.8420, 7.6862, 7.5340, 7.6996, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.0498, 8.2054, 8.3589, 8.5105, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.4770, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 9.9640,\n 10.0855, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.4440, 11.3497, 11.4574, 11.3642, 11.4714, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.3277, 12.4286, 12.5289, 12.6287, 12.7279, 12.6387, 12.5503,\n 12.6492, 12.5615, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.3447, 13.2593, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.9111, 14.0025, 14.0936, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.3762, 14.4651, 14.5535, 14.6416,\n 14.7293, 14.6473, 14.5659, 14.4850, 14.5726, 14.4923, 14.4126, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.7673, 14.8530, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Also, compensation committees need to understand the implications of compensation to provide incentives for management to do the right thing for the company and its shareholders versus themselves.\nHypothesis: The implications of compensation to provide incentives for management to do the right thing for the company and its shareholders versus themselves should always be ignored by compensation committees.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "71", + "Fraction of T in Greenlist": "35.7%", + "z-score": "3.48", + "p value": "0.000252", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 2.0381, 1.8889, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.6131, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.7634, 1.9604, 1.8766, 1.7942, 1.9870, 1.9052,\n 1.8245, 2.0135, 1.9333, 1.8543, 1.7765, 1.9612, 2.1436, 2.0656,\n 1.9887, 1.9127, 2.0913, 2.2678, 2.1918, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 1.9262, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.8173, 1.7496, 1.9149, 2.0785, 2.2405, 2.1723, 2.1049, 2.0381,\n 2.1974, 2.3552, 2.5117, 2.4444, 2.3779, 2.5322, 2.4660, 2.4004,\n 2.5527, 2.4874, 2.6381, 2.7875, 2.9357, 2.8701, 2.8051, 2.9515,\n 2.8868, 2.8226, 2.7591, 2.6961, 2.6336, 2.5717, 2.5103, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.2943, 2.2361, 2.1783,\n 2.1210, 2.0642, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.2535, 2.3891, 2.5238, 2.4678, 2.4122, 2.5456, 2.4902, 2.4351,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.5343, 2.6640, 2.6099, 2.5560,\n 2.5026, 2.6309, 2.7585, 2.7050, 2.6519, 2.7783, 2.7253, 2.6726,\n 2.7979, 2.7454, 2.6932, 2.6414, 2.5898, 2.7137, 2.6623, 2.7852,\n 2.9076, 3.0292, 2.9776, 2.9263, 2.8752, 2.9957, 3.1156, 3.2348,\n 3.1836, 3.1327, 3.2509, 3.2002, 3.1497, 3.2671, 3.2167, 3.3333,\n 3.4494, 3.5648, 3.5143, 3.4641, 3.5787, 3.5286, 3.4788])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.1929, 8.3480, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.2283, 10.3532, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.4932, 10.3812, 10.5027, 10.6232, 10.7429, 10.8616, 10.9794, 10.8699,\n 10.9870, 10.8790, 10.7722, 10.8889, 11.0047, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.1392, 11.2522, 11.1500, 11.0488, 11.1614, 11.0615, 11.1734,\n 11.2846, 11.1860, 11.0883, 10.9917, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.2414, 11.1480, 11.2564, 11.3642, 11.2719, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.7395,\n 11.8427, 11.7543, 11.8571, 11.7696, 11.6827, 11.7851, 11.8870, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.4015, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.5264, 12.6234, 12.7199, 12.8160, 12.7329, 12.6504,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.1491, 13.2419, 13.1617, 13.2542, 13.3463, 13.2668, 13.3585, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 13.9332, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The fourth-century Roman emperor Gratianus was an early visitor, followed much later by such luminaries as the Romantic poet Alphonse de Lamartine, Queen Victoria, Saint-Sa?\u00abns, and Rachmaninov.\nHypothesis: Gratianus was a Roman empoeror.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -1.8058, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.7924, 1.0445, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.7433, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.8165, 0.7688, 0.9017, 0.8540, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.7336, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.6299, 0.7539, 0.7102, 0.8333,\n 0.7896, 0.9119, 0.8682, 0.8248, 0.7816, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.9628, 5.8398, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.8641, 6.7583, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.0379, 6.9378, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.3901, 7.5258, 7.6603, 7.5661, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.7778, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.8948, 7.8065, 7.9336, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.4423, 8.5607, 8.4788, 8.5964, 8.5153,\n 8.6321, 8.7482, 8.8636, 8.7831, 8.8978, 8.8179, 8.9319, 9.0452,\n 8.9660, 8.8874, 9.0000, 9.1119, 9.0340, 9.1452, 9.2559, 9.1785,\n 9.1018, 9.2118, 9.1357, 9.2450, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.5577, 9.6635, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.6532, 9.7574, 9.8611, 9.7886, 9.7167, 9.8198, 9.7483,\n 9.8510, 9.7800, 9.8821, 9.9837, 10.0848, 10.0143, 10.1149, 10.0448,\n 10.1450, 10.2447, 10.1750, 10.1058, 10.2050, 10.3038, 10.2350, 10.3333,\n 10.4312, 10.3628, 10.2949, 10.3923, 10.3248, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: So, I went to court by myself and told them the truth, but it didn't do me any good.\nHypothesis: I went and told the truth at court but it didn't do me any good.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.4685, 0.6222, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.6558, 0.8040, 0.7509, 0.8978, 1.0435, 0.9901,\n 0.9372, 0.8847, 1.0284, 1.1711, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.0879, 1.2257, 1.1746, 1.3112,\n 1.4470, 1.3957, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.0215, 1.1513, 1.1038, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445,\n 1.1717, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.2377, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.3786, 1.5000,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.1825, 8.0358, 8.1929, 8.0498, 8.2054, 8.0656, 7.9286, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.4371,\n 8.3116, 8.1881, 8.0667, 7.9472, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 9.9640,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.3411, 10.4592, 10.5763, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.5769, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 10.8544, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.3039, 11.2142, 11.3204, 11.2316, 11.3373, 11.2493, 11.1621,\n 11.0756, 10.9898, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.0728, 10.9906, 10.9091, 10.8282, 10.7480, 10.6683, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 10.9220, 10.8443, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.4891,\n 11.5868, 11.6840, 11.6082, 11.7050, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Click More Links (on the right-hand side under Miscellaneous), and from\nHypothesis: There are no links to click under Miscellaneous.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.6353, 1.5323, 1.7685, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.2276, 1.3819, 1.3231, 1.4757,\n 1.4171, 1.5681, 1.7179, 1.6591, 1.6008, 1.5430, 1.4857, 1.6330,\n 1.7792, 1.7217, 1.8664, 1.8091, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.7018, 1.8411, 1.7864, 1.9245,\n 1.8699, 2.0068, 1.9524, 1.8983, 1.8446, 1.7913, 1.9263, 2.0605,\n 2.0071, 1.9540, 2.0868, 2.0339, 2.1656, 2.1128, 2.0604, 2.0083,\n 2.1386, 2.0866, 2.0350, 2.1640, 2.1125, 2.2406, 2.1892, 2.3163,\n 2.2650, 2.2140, 2.1634, 2.2892, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 1.9419, 1.8935, 1.8453, 1.7974, 1.7498, 1.8728,\n 1.8252, 1.9473, 1.8999, 1.8527, 1.8058, 1.9267, 1.8799, 1.8333,\n 1.9533, 1.9068, 2.0259, 1.9795, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "191", + "Fraction of T in Greenlist": "96.0%", + "z-score": "23.1", + "p value": "1.33e-118", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 7.7232, 7.9048, 7.6980, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.8926, 9.0520, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.4850, 9.6347, 9.7823, 9.9279, 10.0718, 10.2138, 10.3540, 10.4926,\n 10.6296, 10.7650, 10.8989, 11.0313, 11.1622, 11.2918, 11.4201, 11.5470,\n 11.6727, 11.7971, 11.9203, 12.0424, 12.1633, 12.2832, 12.4019, 12.5196,\n 12.6363, 12.7520, 12.8667, 12.9804, 13.0932, 13.2052, 13.3162, 13.4263,\n 13.5357, 13.6441, 13.7518, 13.8587, 13.9648, 14.0701, 14.1747, 14.2786,\n 14.3818, 14.4842, 14.5860, 14.6871, 14.7875, 14.8873, 14.9864, 15.0849,\n 15.1828, 15.2801, 15.3769, 15.4730, 15.5685, 15.6635, 15.7580, 15.8519,\n 15.9452, 16.0381, 16.1304, 16.2222, 16.3135, 16.4044, 16.4947, 16.5846,\n 16.6740, 16.7629, 16.8514, 16.9394, 17.0270, 17.1141, 17.2008, 17.2871,\n 17.3730, 17.4585, 17.5435, 17.6282, 17.7124, 17.7963, 17.8798, 17.9629,\n 18.0457, 18.1280, 18.2100, 18.2917, 18.3730, 18.4539, 18.5345, 18.6148,\n 18.6947, 18.7743, 18.8535, 18.9325, 19.0111, 19.0894, 19.1673, 19.2450,\n 19.3224, 19.3994, 19.4762, 19.5527, 19.6288, 19.7047, 19.7803, 19.8556,\n 19.9307, 20.0054, 20.0799, 20.1541, 20.2281, 20.3017, 20.3752, 20.4483,\n 20.5212, 20.5939, 20.6663, 20.7384, 20.8103, 20.8820, 20.9534, 21.0246,\n 21.0955, 21.1662, 21.2367, 21.3069, 21.3769, 21.4467, 21.5163, 21.5856,\n 21.6548, 21.7237, 21.7924, 21.8608, 21.9291, 21.9972, 22.0650, 22.1327,\n 22.2001, 22.2674, 22.3344, 22.4012, 22.4679, 22.5343, 22.6006, 22.6667,\n 22.7325, 22.7982, 22.8637, 22.9291, 22.9942, 23.0591, 23.1239])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: and so i started watching it and all of a sudden stay tuned next week and i went what\nHypothesis: I wouldn't have started watching it if I'd known.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.2%", + "z-score": "-0.574", + "p value": "0.717", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.4940, -1.5492,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.4148, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.7216, -0.5927, -0.6333, -0.5053, -0.5459, -0.5864, -0.6266, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.5347, -0.5744])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.5907, 7.7426, 7.8928,\n 7.7710, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 9.7738, 9.8995,\n 9.7897, 9.6813, 9.8064, 9.9304, 9.8237, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 9.9187, 10.0380,\n 9.9392, 9.8414, 9.7447, 9.6490, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.5797, 11.6827, 11.5966, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.4430, 12.5401, 12.6367, 12.5542, 12.4722,\n 12.5685, 12.6643, 12.5831, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.2542, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 13.9007, 13.9897, 13.9113, 13.8333,\n 13.9221, 14.0106, 13.9332, 13.8564, 13.7801, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: no oh no oh well take care\nHypothesis: Bye for now.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.1156, 2.9704, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.0123, 2.8943, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.3147, 3.5228, 3.4101, 3.2998, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.2796, 3.1787, 3.0796, 2.9823, 3.1754,\n 3.0793, 2.9848, 2.8919, 3.0806, 2.9887, 3.1743, 3.0833, 2.9938,\n 2.9057, 2.8189, 3.0000, 3.1789, 3.3556, 3.5301, 3.4427, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.8600, 3.7758, 3.6927,\n 3.8555, 3.7732, 3.6919, 3.8523, 4.0112, 3.9302, 4.0872, 4.2426,\n 4.1621, 4.0825, 4.0038, 3.9260, 3.8490, 3.7730, 3.6977, 3.6233,\n 3.5496, 3.4768, 3.4047, 3.3333, 3.2627, 3.1928, 3.3428, 3.4915,\n 3.6389, 3.5689, 3.4995, 3.4308, 3.3627, 3.2953, 3.2285, 3.3731,\n 3.3066, 3.2408, 3.1755, 3.3182, 3.4599, 3.6004, 3.5350, 3.6742,\n 3.6091, 3.7471, 3.6824, 3.6181, 3.5544, 3.4913, 3.6274, 3.7626,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.2301, 4.3605, 4.2970, 4.2339,\n 4.1713, 4.3004, 4.2381, 4.1763, 4.3042, 4.2426, 4.1816, 4.1210,\n 4.2475, 4.1872, 4.1273, 4.0678, 4.0087, 3.9501, 3.8919, 3.8341,\n 3.7766, 3.7196, 3.8438, 3.9673, 4.0901, 4.0330, 3.9762, 3.9198,\n 4.0415, 3.9853, 3.9294, 3.8740, 3.8189, 3.9392, 4.0589, 4.1779,\n 4.2962, 4.2409, 4.1859, 4.3033, 4.4202, 4.3652, 4.3106, 4.2563,\n 4.3721, 4.3180, 4.2642, 4.3792, 4.3256, 4.4399, 4.3864, 4.3333,\n 4.2805, 4.2280, 4.1758, 4.1239, 4.2369, 4.3492, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.6192, 8.4984, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.4184, 11.5271, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.6287, 12.5394, 12.6387, 12.5503,\n 12.4625, 12.5615, 12.4746, 12.5732, 12.6713, 12.7690, 12.8661, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.5589, 13.6514, 13.5683, 13.6604, 13.5781,\n 13.4963, 13.5881, 13.5069, 13.5985, 13.6896, 13.7803, 13.8707, 13.7904,\n 13.7106, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.1667,\n 14.2546, 14.3422, 14.4294, 14.5162, 14.6027, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: 'Hello, Ben.'\nHypothesis: I ignored Ben\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "13.1%", + "z-score": "-3.89", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -1.9215, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.1866, -2.2404, -2.2937, -2.3462, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.4228, -2.4715, -2.5198, -2.5675, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.6379, -2.6828, -2.7272, -2.7713, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -2.8368,\n -2.8786, -2.9200, -2.9611, -2.7875, -2.8289, -2.8701, -2.9109, -2.9515,\n -2.9917, -3.0317, -3.0714, -3.1109, -3.1500, -3.1889, -3.0227, -3.0619,\n -3.1008, -3.1395, -2.9762, -3.0151, -3.0538, -3.0923, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.1977, -3.2348, -3.2717,\n -3.3083, -3.1536, -3.1905, -3.2271, -3.2636, -3.2998, -3.3359, -3.3717,\n -3.4073, -3.4428, -3.4780, -3.5131, -3.3637, -3.3989, -3.4340, -3.4689,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.6645, -3.5218, -3.5555, -3.5890, -3.6224, -3.4816,\n -3.5151, -3.5485, -3.5817, -3.6148, -3.6477, -3.6805, -3.7131, -3.7455,\n -3.7778, -3.8100, -3.6731, -3.7055, -3.7376, -3.7697, -3.6345, -3.6667,\n -3.6987, -3.7306, -3.7624, -3.7940, -3.8255, -3.8569, -3.8881])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "72.6%", + "z-score": "9.39", + "p value": "2.92e-21", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165, 1.3472,\n 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868, 2.6605, 2.9938,\n 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712, 3.5796, 3.8497, 4.1111,\n 3.9279, 3.7524, 3.5839, 3.8367, 4.0825, 3.9196, 4.1586, 4.3916, 4.6188,\n 4.8407, 5.0576, 5.2697, 5.1121, 5.3199, 5.5234, 5.7229, 5.5705, 5.7664,\n 5.9588, 6.1477, 6.3333, 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711,\n 7.2400, 7.4066, 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195,\n 8.3716, 8.5218, 8.6702, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: how can you prove it\nHypothesis: Can you tell me how to prove it?\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.9%", + "z-score": "0.946", + "p value": "0.172", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.0613, 1.2778, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.9739, 1.1094, 1.0598, 1.0105, 0.9615, 0.9129,\n 1.0465, 0.9979, 1.1305, 1.0820, 1.0338, 1.1651, 1.1169, 1.0690,\n 1.0215, 1.1513, 1.1038, 1.0565, 1.1852, 1.1380, 1.2657, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.3771, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.1790, 1.1339, 1.0890, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.0336, 0.9897, 0.9461])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "61.7%", + "z-score": "11.9", + "p value": "7.8e-33", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.0179, 8.9086, 8.8007, 8.9324, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.3088, 9.4327, 9.5556, 9.6775, 9.5784, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.9813, 9.8858, 10.0029, 10.1193,\n 10.0249, 9.9315, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.1391, 10.2514, 10.1627, 10.2743, 10.1865, 10.0995,\n 10.0133, 10.1243, 10.0389, 9.9542, 9.8702, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.7006, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.3032, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.1933, 11.1173, 11.2164, 11.1410,\n 11.0661, 10.9917, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.5261, 11.6217, 11.7169, 11.8117, 11.7389, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.8769])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "accuracy_without_watermark": 0.47, + "accuracy_with_watermark": 0.37, + "f1_without_watermark": 0.39293604945778854, + "f1_with_watermark": 0.24262017885423737 + } + }, + "validation": { + "results": [ + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The new rights are nice enough\nHypothesis: Everyone really likes the newest benefits \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "11.6%", + "z-score": "-4.38", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.7761, -2.8296, -2.8823, -2.9343, -2.9856, -3.0361, -3.0861,\n -3.1353, -3.1840, -3.2321, -3.2796, -3.3265, -3.3729, -3.4187, -3.4641,\n -3.5090, -3.5533, -3.5973, -3.6407, -3.4057, -3.4503, -3.4945, -3.5382,\n -3.5814, -3.6242, -3.4000, -3.4438, -3.4871, -3.5301, -3.5726, -3.6148,\n -3.6566, -3.4429, -3.2320, -3.2757, -3.3190, -3.3619, -3.4044, -3.4466,\n -3.4883, -3.5298, -3.5708, -3.6116, -3.6520, -3.6920, -3.7318, -3.7712,\n -3.8104, -3.8492, -3.8877, -3.6950, -3.7342, -3.7730, -3.5839, -3.3968,\n -3.4370, -3.4768, -3.2931, -3.3333, -3.3733, -3.4130, -3.4524, -3.4915,\n -3.5303, -3.3526, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.4499, -3.4879, -3.5256, -3.5631, -3.6004, -3.6374, -3.6742,\n -3.7108, -3.7471, -3.7832, -3.8191, -3.8548, -3.8903, -3.9255, -3.9606,\n -3.9954, -4.0301, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -4.0415,\n -4.0754, -3.9181, -3.7619, -3.7966, -3.8312, -3.8655, -3.8997, -3.9337,\n -3.9675, -4.0011, -4.0345, -4.0678, -4.1009, -4.1338, -4.1666, -4.1992,\n -4.2316, -4.2639, -4.2960, -4.1477, -4.1800, -4.2122, -4.0656, -4.0980,\n -4.1303, -4.1624, -4.0177, -4.0501, -4.0822, -4.1143, -4.1461, -4.1779,\n -4.2094, -4.0678, -3.9269, -3.9590, -3.9910, -4.0228, -4.0545, -4.0860,\n -4.1174, -4.1487, -4.1798, -4.2108, -4.2416, -4.2723, -4.3029, -4.3333,\n -4.3637, -4.3938, -4.4239, -4.4538, -4.4837, -4.5134, -4.3792])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 7.8520, 7.9853, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.4891, 8.6083, 8.5249, 8.4423, 8.5607, 8.6783, 8.7952, 8.9113,\n 9.0267, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 9.1250, 9.0452,\n 8.9660, 9.0786, 9.1905, 9.1119, 9.2232, 9.3338, 9.2559, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.6456, 9.7526, 9.6764,\n 9.6008, 9.7072, 9.8131, 9.9184, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.3154, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.3999, 10.5001, 10.4281, 10.3566, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.8505, 10.7795, 10.8770, 10.9740, 10.9034, 10.8333,\n 10.9299, 11.0261, 10.9564, 10.8872, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: This site includes a list of all award winners and a searchable database of Government Executive articles.\nHypothesis: The Government Executive articles housed on the website are not able to be searched.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.2710, 1.2039, 1.1375, 1.3101, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.6166, 1.7809, 1.7150, 1.6498, 1.8116,\n 1.7467, 1.6823, 1.8419, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 2.1182, 2.2711, 2.2074, 2.1442, 2.0817, 2.0197, 1.9582, 1.8974,\n 2.0470, 2.1954, 2.3426, 2.2813, 2.2205, 2.1602, 2.3054, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.0948, 2.0373, 1.9803,\n 1.9237, 2.0642, 2.0078, 1.9518, 1.8962, 1.8411, 1.7864, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.7679, 1.9013, 1.8490, 1.7970, 1.7454, 1.8773, 1.8257,\n 1.7746, 1.9052, 1.8541, 1.9837, 2.1125, 2.0613, 2.0105, 1.9599,\n 2.0873, 2.0369, 1.9868, 2.1131, 2.0631, 2.0134, 2.1385, 2.0889,\n 2.0396, 1.9906, 2.1145, 2.2377, 2.1886, 2.3110, 2.2620, 2.2133,\n 2.1648, 2.2860, 2.4065, 2.3580, 2.3098, 2.4294, 2.5483, 2.6667,\n 2.6182, 2.5700, 2.5220, 2.6393, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "66.2%", + "z-score": "13.4", + "p value": "4.18e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.3073, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 8.8544,\n 8.7652, 8.8860, 9.0060, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.5620, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.4769, 10.3923,\n 10.5002, 10.4164, 10.5238, 10.6306, 10.7367, 10.6537, 10.7594, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.2789, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.4411, 11.5414, 11.6412, 11.7405, 11.8393, 11.9377,\n 12.0355, 11.9558, 12.0532, 11.9741, 12.0712, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.6918, 12.7847, 12.8771, 12.9691, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.4510, 13.3759])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: uh i don't know i i have mixed emotions about him uh sometimes i like him but at the same times i love to see somebody beat him\nHypothesis: I like him for the most part, but would still enjoy seeing someone beat him.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.3245, 1.2501, 1.1767, 1.3641, 1.5492,\n 1.7321, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.9149, 2.0785, 2.0107, 1.9437, 2.1049, 2.0381,\n 1.9720, 2.1309, 2.2884, 2.2222, 2.1567, 2.0918, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.7772, 1.7179, 1.6591, 1.6008, 1.5430, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.6081, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.4335, 1.5714, 1.5187, 1.4662, 1.6028, 1.7384, 1.8732,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.3131, 1.2657, 1.2185,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.2839, 1.2377, 1.3620,\n 1.3159, 1.4393, 1.5621, 1.5159, 1.6378, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.6496, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.9479, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.3638, 7.5032, 7.4044, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.5258, 7.4316, 7.3386, 7.2466,\n 7.3810, 7.2900, 7.4233, 7.3333, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.1731, 8.0882, 8.2107, 8.1266, 8.0434, 7.9608,\n 8.0824, 8.0006, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.7831, 8.7033, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.8448, 8.7681, 8.6921, 8.6166,\n 8.7284, 8.6535, 8.7647, 8.6903, 8.8008, 8.9107, 9.0200, 9.1287,\n 9.2368, 9.3443, 9.2704, 9.3774, 9.3040, 9.4103, 9.3374, 9.2651,\n 9.3708, 9.4761, 9.4042, 9.3328, 9.4375, 9.3665, 9.2961, 9.2261,\n 9.3302, 9.2607, 9.3642, 9.2952, 9.3982, 9.5007, 9.6028, 9.7043,\n 9.8054, 9.9060, 9.8373, 9.9374, 9.8691, 9.9687, 9.9008, 9.8333,\n 9.9325, 10.0312, 9.9641, 9.8974, 9.9957, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah i i think my favorite restaurant is always been the one closest you know the closest as long as it's it meets the minimum criteria you know of good food\nHypothesis: My favorite restaurants are always at least a hundred miles away from my house. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 4.6101, 4.4272, 4.2515, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 3.7017, 3.5590, 3.4207, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.4293, 3.6380, 3.8431, 3.7273, 3.6141, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.4743, 3.6662, 3.5642, 3.4641,\n 3.3657, 3.5533, 3.4562, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.4427, 3.3566,\n 3.2717, 3.1879, 3.1052, 3.0237, 3.1937, 3.1129, 3.0330, 3.2004,\n 3.1211, 3.0429, 2.9656, 3.1300, 3.0533, 2.9775, 3.1394, 3.0641,\n 2.9897, 3.1493, 3.0754, 3.0022, 2.9299, 2.8583, 2.7875, 2.9439,\n 3.0989, 3.0282, 2.9582, 3.1111, 3.2627, 3.4130, 3.3428, 3.2733,\n 3.4217, 3.5689, 3.7148, 3.8596, 4.0032, 4.1457, 4.2870, 4.2164,\n 4.1464, 4.0771, 4.2167, 4.3552, 4.4927, 4.6291, 4.5596, 4.6949,\n 4.8291, 4.9624, 5.0948, 5.2262, 5.3567, 5.2868, 5.2175, 5.1488,\n 5.0806, 5.0130, 4.9460, 4.8795, 4.8135, 4.7481, 4.8763, 4.8113,\n 4.7467, 4.6826, 4.6190, 4.7458, 4.6825, 4.6198, 4.7454, 4.6829,\n 4.6209, 4.7455, 4.8693, 4.8074, 4.7460, 4.8687, 4.9908, 4.9295,\n 4.8687, 4.9897, 4.9292, 4.8690, 4.8093, 4.9292, 4.8698, 4.8107,\n 4.9297, 4.8709, 4.8125, 4.7544, 4.8724, 4.8146, 4.7572, 4.8742,\n 4.9906, 4.9333, 4.8763, 4.8197, 4.9351, 4.8787, 4.8227, 4.9373,\n 4.8815, 4.8260, 4.7709, 4.7161, 4.6616, 4.6074, 4.5535, 4.6667,\n 4.6130, 4.5596, 4.6720, 4.6188, 4.5659, 4.5134, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 4.9652, 5.2085, 5.0000, 4.8008, 4.6101, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.1241, 4.9962, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.6011, 5.7689, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.1813, 7.3233, 7.2232, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.7600, 8.8833, 8.7913, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.8702, 9.7869, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.1692, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.7175, 10.8200, 10.7423, 10.8443, 10.9458, 11.0468,\n 10.9697, 11.0702, 10.9936, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.4356, 11.3608, 11.4581, 11.5549, 11.6514, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.6217, 11.7169, 11.6441, 11.7389, 11.8333,\n 11.7611, 11.8551, 11.7833, 11.8769, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: i don't know um do you do a lot of camping\nHypothesis: I know exactly.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "74", + "Fraction of T in Greenlist": "37.2%", + "z-score": "3.97", + "p value": "3.59e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 1.0510, 0.9608, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.5635, 1.7634, 1.6803, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.6997, 1.8838, 2.0656,\n 2.2453, 2.1678, 2.0913, 2.2678, 2.1918, 2.1167, 2.0426, 2.2156,\n 2.1420, 2.0692, 1.9973, 2.1669, 2.3349, 2.2629, 2.1917, 2.1213,\n 2.0517, 2.2162, 2.3791, 2.3094, 2.2405, 2.1723, 2.3324, 2.2646,\n 2.4228, 2.5796, 2.5117, 2.6667, 2.5991, 2.5322, 2.4660, 2.4004,\n 2.3354, 2.2711, 2.4227, 2.3586, 2.5087, 2.6575, 2.8051, 2.9515,\n 2.8868, 2.8226, 2.9673, 2.9035, 2.8402, 2.7775, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.8753, 3.0151, 2.9537, 2.8928, 2.8324, 2.7724,\n 2.9103, 3.0471, 2.9872, 2.9277, 2.8687, 3.0039, 2.9451, 3.0792,\n 3.2124, 3.1536, 3.2857, 3.2271, 3.1690, 3.1113, 3.0540, 2.9971,\n 2.9406, 3.0706, 3.0143, 3.1433, 3.2715, 3.3989, 3.5256, 3.4689,\n 3.4126, 3.5382, 3.4821, 3.4263, 3.3710, 3.4953, 3.4401, 3.3853,\n 3.3309, 3.4539, 3.5762, 3.5218, 3.4677, 3.4140, 3.3606, 3.4816,\n 3.6019, 3.5485, 3.4954, 3.4427, 3.5619, 3.5093, 3.6277, 3.7455,\n 3.6929, 3.8100, 3.7576, 3.7055, 3.6537, 3.6021, 3.5509, 3.5000,\n 3.6156, 3.5648, 3.6797, 3.7940, 3.9078, 4.0210, 3.9699])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 7.8320, 7.6667,\n 7.8355, 8.0017, 7.8420, 7.6862, 7.5340, 7.6996, 7.8628, 8.0238,\n 7.8766, 8.0358, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.3716,\n 8.5218, 8.6702, 8.5347, 8.6817, 8.8271, 8.6948, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.8667, 9.0068, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.9169, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 9.8852, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.5490, 10.6667, 10.7835, 10.8995, 10.7955, 10.9109,\n 11.0254, 10.9229, 11.0368, 11.1500, 11.2623, 11.3740, 11.2732, 11.3842,\n 11.4945, 11.3950, 11.5048, 11.4065, 11.3091, 11.4184, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.8704, 11.9754, 12.0798,\n 12.1836, 12.2868, 12.1936, 12.2963, 12.3985, 12.3063, 12.4081, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.7376,\n 12.8359, 12.9337, 13.0311, 12.9430, 13.0400, 13.1364, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.3447, 13.4390, 13.5329, 13.4477, 13.5412,\n 13.4567, 13.3728, 13.4661, 13.3829, 13.4758, 13.3933, 13.4859, 13.5781,\n 13.6698, 13.7612, 13.8522, 13.9427, 14.0329, 14.1227, 14.2121, 14.1309,\n 14.2200, 14.3087, 14.2282, 14.3166, 14.2367, 14.3248, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.6812, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: well that would be a help i wish they would do that here we have got so little landfill space left that we're going to run out before the end of this decade and it's really going to be\nHypothesis: We have plenty of space in the landfill.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.4857, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.6710, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.6337, 1.5818, 1.5303, 1.6641, 1.7970, 1.7454, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.8033, 1.7529, 1.7028, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.8660, 1.9906, 2.1145, 2.0656, 2.0170, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 2.0688, 2.0212, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.9795, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.5649, 8.7093,\n 8.5819, 8.4566, 8.3333, 8.4770, 8.3560, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 8.8007, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 9.1590, 9.0582,\n 8.9586, 9.0845, 8.9861, 8.8889, 8.7927, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.1253, 11.0371, 10.9497, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.2857, 11.2001, 11.1151, 11.2194, 11.1352, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.0180, 11.9377,\n 12.0355, 11.9558, 12.0532, 12.1502, 12.0712, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.3935, 12.3163, 12.2397, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.7017, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah i know and i did that all through college and it worked too\nHypothesis: I did that all through college but it never worked \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.6013, 1.5068, 1.4142, 1.6348, 1.5430,\n 1.7589, 1.9711, 1.8791, 1.7889, 1.7002, 1.9064, 2.1094, 2.3094,\n 2.2200, 2.4163, 2.6098, 2.5205, 2.4327, 2.3462, 2.2611, 2.4495,\n 2.3651, 2.2819, 2.2000, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 2.1678, 2.0913, 2.0158, 1.9413, 1.8677, 2.0426, 1.9695,\n 1.8972, 2.0692, 2.2393, 2.1669, 2.0954, 2.0247, 1.9548, 2.1213,\n 2.0517, 2.2162, 2.3791, 2.3094, 2.2405, 2.1723, 2.3324, 2.4910,\n 2.6481, 2.5796, 2.5117, 2.6667, 2.5991, 2.5322, 2.4660, 2.4004,\n 2.5527, 2.4874, 2.4227, 2.3586, 2.5087, 2.4449, 2.3817, 2.3190,\n 2.2569, 2.1954, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.2454,\n 2.1858, 2.1268, 2.2699, 2.4121, 2.3529, 2.2943, 2.2361, 2.1783,\n 2.3183, 2.2608, 2.3995, 2.5373, 2.4797, 2.4225, 2.3657, 2.5019,\n 2.6370, 2.7713, 2.7143, 2.6576, 2.7906, 2.7341, 2.6781, 2.6224,\n 2.5672, 2.6984, 2.6433, 2.5886, 2.5343, 2.6640, 2.6099, 2.5560,\n 2.5026, 2.4495, 2.3967, 2.5247, 2.4721, 2.4198, 2.3679, 2.3163,\n 2.4426, 2.3912, 2.3400, 2.4653, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.5099, 2.4597, 2.5820, 2.7036, 2.6534, 2.6034, 2.5538,\n 2.6742, 2.7940, 2.9132, 2.8633, 2.9817, 3.0995, 3.0496, 3.0000,\n 2.9507, 2.9016, 3.0182, 2.9692, 2.9205, 2.8721, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.0014, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 8.7927, 8.6976, 8.6035, 8.7287,\n 8.8529, 8.7600, 8.8833, 8.7913, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.7091, 9.6210, 9.5338, 9.4474, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.8430, 9.9542, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.3583, 10.2766, 10.1955, 10.1151,\n 10.2220, 10.3284, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.9458, 11.0468,\n 10.9697, 11.0702, 10.9936, 10.9176, 10.8421, 10.9422, 11.0418, 10.9669,\n 11.0661, 10.9917, 11.0904, 11.1886, 11.2864, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.4300, 11.5261, 11.6217, 11.5489, 11.6441, 11.5718, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.5470, 11.6411, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Calcutta seems to be the only other production center having any pretensions to artistic creativity at all, but ironically you're actually more likely to see the works of Satyajit Ray or Mrinal Sen shown in Europe or North America than in India itself.\nHypothesis: Most of Mrinal Sen's work can be found in European collections.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 1.2501, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.6577, 1.8378, 1.7638, 1.6908, 1.8677, 2.0426, 2.2156,\n 2.1420, 2.0692, 1.9973, 1.9262, 2.0954, 2.0247, 1.9548, 1.8856,\n 2.0517, 2.2162, 2.1470, 2.0785, 2.2405, 2.1723, 2.1049, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 2.0918, 2.2468, 2.1822,\n 2.1182, 2.2711, 2.4227, 2.5731, 2.5087, 2.4449, 2.5934, 2.7406,\n 2.8868, 2.8226, 2.7591, 2.6961, 2.6336, 2.5717, 2.7153, 2.6536,\n 2.5925, 2.7344, 2.8753, 2.8141, 2.7534, 2.8928, 2.8324, 2.7724,\n 2.7129, 2.8505, 2.7913, 2.7325, 2.6742, 2.6163, 2.7520, 2.6943,\n 2.6370, 2.5802, 2.7143, 2.8475, 2.7906, 2.7341, 2.8660, 2.8098,\n 2.7539, 2.6984, 2.8288, 2.7735, 2.7186, 2.6640, 2.6099, 2.5560,\n 2.5026, 2.6309, 2.5776, 2.5247, 2.6519, 2.7783, 2.7253, 2.6726,\n 2.7979, 2.7454, 2.6932, 2.6414, 2.7654, 2.7137, 2.6623, 2.6112,\n 2.5604, 2.6830, 2.6323, 2.5820, 2.5319, 2.6534, 2.7741, 2.7240,\n 2.6742, 2.7940, 2.7443, 2.6949, 2.6458, 2.7644, 2.7154, 2.6667,\n 2.6182, 2.5700, 2.6874, 2.8043, 2.7560, 2.7080, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.0179, 8.9086, 9.0401, 8.9324, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.8987, 9.7986, 9.6995, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.3898, 10.2975, 10.2062,\n 10.3191, 10.2287, 10.3409, 10.2514, 10.3630, 10.4738, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.7987, 10.9048, 11.0102, 10.9259, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.2789, 11.3812, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.4223, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.5495, 12.4713, 12.5657, 12.4880, 12.5820, 12.6757, 12.5986,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.0910, 13.1815, 13.1063, 13.0316, 13.1219, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: If that investor were willing to pay extra for the security of limited downside, she could buy put options with a strike price of $98, which would lock in her profit on the shares at $18, less whatever the options cost.\nHypothesis: THe strike price could be $8.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.4659, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.6013, 1.5068, 1.7285, 1.9462, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.3851, 2.5873, 2.7863, 2.6914, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.2404, 2.1546, 2.3462, 2.5352, 2.7217,\n 2.9057, 2.8189, 2.7333, 2.6491, 2.8292, 2.7456, 2.6632, 2.5820,\n 2.5019, 2.4228, 2.3448, 2.2678, 2.4423, 2.6148, 2.7854, 2.9542,\n 2.8764, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.3570,\n 2.2862, 2.2162, 2.1470, 2.0785, 2.2405, 2.1723, 2.3324, 2.4910,\n 2.4228, 2.3552, 2.2884, 2.2222, 2.1567, 2.0918, 2.0276, 2.1822,\n 2.3354, 2.4874, 2.6381, 2.5731, 2.5087, 2.4449, 2.3817, 2.5298,\n 2.6768, 2.6135, 2.5508, 2.4887, 2.4271, 2.3660, 2.5103, 2.4495,\n 2.3891, 2.3293, 2.4717, 2.6131, 2.5532, 2.6933, 2.6336, 2.5744,\n 2.5156, 2.6540, 2.5954, 2.5373, 2.6742, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.3891, 2.3333, 2.4678, 2.4122, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.2723, 2.2188, 2.1656, 2.1128, 2.2436, 2.1909,\n 2.3206, 2.2680, 2.2159, 2.1640, 2.1125, 2.2406, 2.1892, 2.1381,\n 2.2650, 2.2140, 2.1634, 2.1131, 2.0631, 2.0134, 2.1385, 2.0889,\n 2.0396, 1.9906, 1.9419, 2.0656, 2.0170, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.8145, 1.7688, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "93", + "Fraction of T in Greenlist": "46.7%", + "z-score": "7.08", + "p value": "7.19e-13", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.1623, 3.0072, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.5590, 3.4207, 3.2863,\n 3.5165, 3.3853, 3.6098, 3.8297, 3.7009, 3.5753, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.9614, 4.1633, 4.0446, 3.9284, 3.8146, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.1740, 4.0657, 3.9595, 4.1461, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.4809, 4.3788, 4.2784, 4.4537, 4.3546,\n 4.5274, 4.6981, 4.6000, 4.5034, 4.4083, 4.5760, 4.4820, 4.6476,\n 4.8113, 4.9731, 4.8797, 4.7875, 4.6967, 4.8561, 4.7662, 4.9237,\n 5.0795, 4.9904, 4.9023, 4.8154, 4.9691, 4.8830, 5.0350, 5.1855,\n 5.3345, 5.2489, 5.1643, 5.0807, 5.2278, 5.1450, 5.2906, 5.4349,\n 5.3526, 5.2713, 5.1908, 5.3333, 5.2535, 5.3947, 5.5348, 5.6737,\n 5.5942, 5.5155, 5.4377, 5.5750, 5.4977, 5.6338, 5.7689, 5.6921,\n 5.6160, 5.5407, 5.6743, 5.5995, 5.7320, 5.8635, 5.9941, 5.9196,\n 5.8458, 5.7726, 5.9019, 5.8292, 5.9575, 6.0848, 6.0125, 5.9409,\n 5.8698, 5.9960, 5.9254, 6.0506, 6.1750, 6.2985, 6.2282, 6.1584,\n 6.0892, 6.2116, 6.1429, 6.2644, 6.3853, 6.3168, 6.2489, 6.1815,\n 6.3013, 6.2342, 6.3532, 6.4715, 6.5891, 6.5223, 6.4559, 6.3901,\n 6.5067, 6.4413, 6.5571, 6.6724, 6.6072, 6.5424, 6.4781, 6.5924,\n 6.5285, 6.6421, 6.7551, 6.8675, 6.8037, 6.7404, 6.6775, 6.7890,\n 6.7264, 6.8373, 6.9477, 6.8853, 6.8233, 6.7618, 6.8713, 6.8101,\n 6.9190, 7.0273, 7.1352, 7.0741, 7.0133, 6.9530, 7.0601, 7.0000,\n 7.1065, 7.2125, 7.1527, 7.0932, 7.0340, 7.1393, 7.0804])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: 3) Dare you rise to the occasion, like Raskolnikov, and reject the petty rules that govern lesser men?\nHypothesis: Would you rise up and defeaat all evil lords in the town?\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "193", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "29.5%", + "z-score": "1.45", + "p value": "0.0729", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 1.2366, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.4027, 1.5843, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785,\n 1.1138, 1.2831, 1.4506, 1.3856, 1.3213, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 1.0973, 1.0435, 1.1882,\n 1.3318, 1.2778, 1.2243, 1.3663, 1.3128, 1.2597, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.5187, 1.6554, 1.7913, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.4427, 1.5731, 1.5236, 1.4743, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.5848, 1.5363, 1.4881, 1.6148, 1.5667,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.5020, 1.4551, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.5621, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.4546])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.3086, 9.1890, 9.3231, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.4858, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.0242, 9.9146, 10.0385, 10.1614, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.0368, 10.9355, 10.8353, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.3222, 11.2268,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.4714, 11.3791, 11.4857,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.3508, 12.2628, 12.1756,\n 12.2758, 12.1893, 12.1036, 12.0185, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.3603, 12.4575, 12.3754, 12.2940,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.4223, 12.3428, 12.2638, 12.3595,\n 12.2812, 12.3764, 12.2987, 12.2214, 12.3163, 12.2397, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.5394, 12.6323, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The most important directions are simply up and up leads eventually to the cathedral and fortress commanding the hilltop, and down inevitably leads to one of three gates through the wall to the new town.\nHypothesis: Go downwards to one of the gates, all of which will lead you into the cathedral.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.3244, 0.2692, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.3311, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.6437, 0.5991, 0.5548, 0.6810,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.6299, 0.5864, 0.7102, 0.6667,\n 0.6234, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.2525, 8.4017, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.0068, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.1735, 9.3086, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 9.8590, 9.7442, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.5363, 10.6547, 10.7722, 10.8889, 10.7835, 10.8995, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.3740, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.5048, 11.6139, 11.5157, 11.6242, 11.7320, 11.8392,\n 11.9457, 12.0516, 12.1568, 12.0605, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.1836, 12.2868, 12.1936, 12.2963, 12.3985, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.9249,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.2410, 13.3361, 13.4308, 13.5250, 13.4390, 13.5329, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.9111, 14.0025, 13.9185, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.0248, 14.1149, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.4850, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.5871, 14.6738, 14.5948, 14.6812, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The bhakti movement of the Tamils brought a new warmth to the hitherto rigid Brahmanic ritual of Hinduism.\nHypothesis: The Tamils' bhakti movement froze the previously warm ritual of Hinduism.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.8721, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.4147, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.6547, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.6598, -0.6993, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "59.4%", + "z-score": "11.1", + "p value": "3.68e-29", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 7.9216, 8.0632, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.2151, 8.1176, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.1615, 9.2768, 9.1927, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.3686, 9.4812, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.0631, 10.1695, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.3289, 10.2516, 10.1749, 10.2790, 10.2029, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.3154, 10.4170, 10.5181, 10.4447,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.7978, 10.8961,\n 10.8241, 10.9220, 11.0194, 10.9480, 11.0450, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.1919, 11.1218, 11.0521, 11.1475])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: see now in a situation like that the boys are only sixteen years old and they were sexually involved with her and i think like at that particular point she was twenty three you know so she wasn't really that much older than them and being a boy at that age i think that they're very um you know let's face it that's at a point in your life when you you're just starting to realize all the things of life\nHypothesis: Everyone involved was the same age.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.3641, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.7465, 1.8974,\n 1.8370, 1.7772, 1.9261, 2.0739, 2.2205, 2.1602, 2.1005, 2.0412,\n 2.1858, 2.1268, 2.0682, 2.2111, 2.3529, 2.2943, 2.2361, 2.3764,\n 2.3183, 2.4574, 2.5954, 2.5373, 2.6742, 2.8101, 2.7520, 2.6943,\n 2.6370, 2.5802, 2.7143, 2.6576, 2.6014, 2.7341, 2.8660, 2.8098,\n 2.7539, 2.8845, 2.8288, 2.7735, 2.9029, 3.0315, 2.9761, 2.9212,\n 3.0486, 2.9938, 2.9394, 3.0657, 3.1912, 3.1368, 3.0827, 3.2071,\n 3.1532, 3.0997, 3.2230, 3.1696, 3.1166, 3.2389, 3.1860, 3.1334,\n 3.2547, 3.2023, 3.1502, 3.2705, 3.2186, 3.1669, 3.2863, 3.2348,\n 3.1836, 3.3020, 3.2509, 3.2002, 3.3177, 3.2671, 3.2167, 3.3333,\n 3.2831, 3.2332, 3.3489, 3.2991, 3.2496, 3.3645, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "163", + "Fraction of T in Greenlist": "81.9%", + "z-score": "18.5", + "p value": "4.91e-77", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 8.8426, 8.6667,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.6600, 10.7918, 10.9222, 10.7732, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.6189,\n 11.4829, 11.6039, 11.7239, 11.8429, 11.9609, 12.0779, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.4072, 12.2794, 12.3928, 12.5053, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 13.8497, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.6283, 14.5173, 14.4075, 14.5045, 14.6010, 14.6969,\n 14.7924, 14.8873, 14.9817, 15.0756, 15.1690, 15.2619, 15.1556, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.7680, 15.6667, 15.7560, 15.8450, 15.9335, 16.0216, 16.1093,\n 16.1966, 16.2835, 16.3700, 16.4561, 16.3575, 16.2598, 16.3459, 16.4317,\n 16.5171, 16.6021, 16.6868, 16.7711, 16.8550, 16.9386, 17.0218, 16.9265,\n 16.8320, 16.9152, 16.9982, 17.0807, 17.1630, 17.2449, 17.3265, 17.4078,\n 17.4887, 17.5693, 17.4770, 17.3854, 17.4660, 17.5464, 17.6264, 17.7061,\n 17.7856, 17.8647, 17.9435, 18.0221, 18.1003, 18.0107, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.3103, 18.3871, 18.4637, 18.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: You and your friends are not welcome here, said Severn.\nHypothesis: Severn said the people were not welcome there.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.1516, -0.2255, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, 0.1502, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.1761, 0.1317, 0.2626, 0.2182, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.5927, 0.5489, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 7.8667, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.5939, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 9.7986, 9.9187, 9.8198,\n 9.9392, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.7843, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.1480, 11.0554, 10.9637, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.4762, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.3455, 12.2627, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.6643, 12.5831, 12.5024, 12.5979, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.5897, 13.5131, 13.4371, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: um-hum um-hum yeah well uh i can see you know it's it's it's it's kind of funny because we it seems like we loan money you know we money with strings attached and if the government changes and the country that we loan the money to um i can see why the might have a different attitude towards paying it back it's a lot us that you know we don't really loan money to to countries we loan money to governments and it's the\nHypothesis: We don't loan a lot of money.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.5556, 1.4931, 1.4313, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.2808, 1.2247,\n 1.3725, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.4001, 1.5396,\n 1.6781, 1.6246, 1.5714, 1.7085, 1.6554, 1.6028, 1.5505, 1.6859,\n 1.6337, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.8033, 1.7529, 1.7028, 1.8317, 1.7817,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.8383, 1.9640, 1.9149,\n 1.8660, 1.9906, 1.9419, 1.8935, 1.8453, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.8999, 2.0212, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.1107, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 8.9355, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.0179, 9.1493, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.7376,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.7678, 9.6732, 9.5795, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.4501, 9.3611, 9.2729, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.7367, 10.6537, 10.7594, 10.8644,\n 10.7822, 10.8867, 10.8051, 10.9091, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 10.9773, 11.0793, 11.0004, 11.1018, 11.2028, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.3688, 11.4674, 11.3910, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.5329, 11.4581, 11.3837, 11.4806, 11.5771,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: i'm not sure what the overnight low was\nHypothesis: I don't know how cold it got last night.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.0381, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.4371, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.6713, 2.5690, 2.4689,\n 2.6765, 2.8808, 2.7811, 2.6833, 2.8830, 2.7863, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.6098, 2.5205, 2.7107, 2.8983, 2.8093, 2.9938,\n 2.9057, 3.0873, 3.0000, 3.1789, 3.0924, 3.0071, 2.9231, 3.0984,\n 3.0151, 2.9329, 2.8518, 3.0237, 2.9433, 2.8638, 3.0330, 2.9542,\n 2.8764, 2.7995, 2.9656, 3.1300, 3.2928, 3.2157, 3.1394, 3.2998,\n 3.2242, 3.1493, 3.0754, 3.2332, 3.1597, 3.0870, 3.2426, 3.1704,\n 3.0989, 3.2525, 3.1814, 3.1111, 3.2627, 3.4130, 3.3428, 3.2733,\n 3.4217, 3.3526, 3.4995, 3.4308, 3.3627, 3.2953, 3.2285, 3.1623,\n 3.0967, 3.2408, 3.3838, 3.3182, 3.4599, 3.3947, 3.5350, 3.4701,\n 3.6091, 3.5446, 3.4806, 3.4171, 3.5544, 3.4913, 3.4286, 3.3665,\n 3.5022, 3.4403, 3.3789, 3.5132, 3.4521, 3.3915, 3.3314, 3.4641,\n 3.5960, 3.7270, 3.6667, 3.6068, 3.7366, 3.6770, 3.6178, 3.7463,\n 3.8741, 3.8150, 3.7563, 3.8829, 3.8244, 3.7664, 3.8919, 3.8341,\n 3.7766, 3.9010, 4.0247, 3.9673, 3.9104, 4.0330, 3.9762, 4.0980,\n 4.0415, 3.9853, 3.9294, 3.8740, 3.8189, 3.9392, 4.0589, 4.0038,\n 4.1226, 4.0678, 4.1859, 4.1312, 4.2485, 4.1940, 4.1399, 4.0860,\n 4.2023, 4.1487, 4.0953, 4.0423, 4.1576, 4.1048, 4.0522, 4.1667,\n 4.1143, 4.0622, 4.0105, 4.1239, 4.2369, 4.3492, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.86", + "p value": "1.97e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.0226, 7.1857, 7.0557, 6.9282,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.9294, 6.8205, 6.7132,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.4501, 6.5993, 6.4993, 6.4008,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.1283,\n 6.0380, 6.1820, 6.0927, 6.2354, 6.1470, 6.0596, 6.2008, 6.1143,\n 6.2541, 6.3928, 6.5303, 6.4444, 6.5807, 6.7159, 6.8500, 6.9830,\n 7.1149, 7.0296, 6.9451, 6.8615, 6.9923, 7.1220, 7.2508, 7.1678,\n 7.2956, 7.2134, 7.3402, 7.2587, 7.3845, 7.3037, 7.2236, 7.1443,\n 7.2691, 7.1904, 7.1125, 7.0353, 7.1590, 7.0823, 7.0063, 7.1291,\n 7.0537, 7.1755, 7.1007, 7.2217, 7.3419, 7.2675, 7.1938, 7.1207,\n 7.0481, 6.9762, 6.9048, 7.0238, 7.1421, 7.0711, 7.0006, 7.1181,\n 7.0481, 6.9786, 7.0952, 7.0262, 6.9577, 6.8897, 6.8222, 6.7552,\n 6.8707, 6.8041, 6.7380, 6.6724, 6.6072, 6.7217, 6.6568, 6.7706,\n 6.8838, 6.9964, 6.9317, 7.0436, 7.1549, 7.0905, 7.2012, 7.3113,\n 7.4208, 7.5297, 7.6381, 7.5738, 7.6816, 7.6177, 7.7249, 7.8316,\n 7.9377, 7.8740, 7.9796, 7.9162, 8.0212, 7.9582, 8.0627, 8.0000,\n 7.9377, 7.8758, 7.9796, 7.9179, 7.8567])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: so i have to find a way to supplement that\nHypothesis: I need a way to add something extra.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.3665, 3.6108, 3.8490, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 3.9620, 4.1779, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.6790, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.0034, 4.8857, 5.0684, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.4909, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 5.9874, 5.8835, 5.7812, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.2601, 6.4059, 6.5504, 6.4566, 6.3640,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.6066, 6.5169, 6.4283, 6.3408,\n 6.4795, 6.3928, 6.5303, 6.4444, 6.5807, 6.7159, 6.8500, 6.9830,\n 6.8977, 6.8133, 6.7298, 6.6471, 6.7788, 6.9094, 6.8274, 6.7462,\n 6.6658, 6.7952, 6.9237, 7.0513, 6.9714, 6.8922, 6.8138, 6.7361,\n 6.6591, 6.7854, 6.7090, 6.6332, 6.5582, 6.6833, 6.6088, 6.7330,\n 6.6591, 6.7823, 6.7089, 6.6361, 6.5639, 6.6861, 6.8075, 6.9282,\n 6.8563, 6.7850, 6.9048, 7.0238, 6.9529, 6.8825, 7.0006, 7.1181,\n 7.2348, 7.3508, 7.4662, 7.5809, 7.5106, 7.4409, 7.3717, 7.4855,\n 7.5988, 7.7114, 7.8233, 7.7544, 7.6859, 7.7971, 7.9078, 8.0178,\n 7.9497, 7.8820, 7.8147, 7.7480, 7.6816, 7.7908, 7.7249, 7.6594,\n 7.5944, 7.7028, 7.6381, 7.7460, 7.6816, 7.7889, 7.7249, 7.6613,\n 7.5981, 7.7047, 7.6418, 7.7478, 7.6853, 7.7907, 7.8956, 7.8333,\n 7.7715, 7.7099, 7.8142, 7.7530, 7.8567, 7.7958, 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.2178, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.6747, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.0987, 9.0057, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.6828, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.7622, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.6683, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.0235, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.4356, 11.3608, 11.2864, 11.3837, 11.4806, 11.5771,\n 11.5033, 11.4300, 11.5261, 11.6217, 11.7169, 11.8117, 11.9060, 11.8333,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: the hologram makes up all these things and uh i mean sometimes sometimes it's funny sometimes it's not but uh you know it's something to pass the time until we do and then and then we watch football\nHypothesis: Sometimes it is amusing to see what the hologram creates.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.4689,\n -2.2177, -2.2743, -2.3301, -2.0870, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.1547, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.7242, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856, 1.7321, 1.5852, 1.4444,\n 1.3093, 1.1793, 1.4757, 1.7628, 1.6330, 1.5076, 1.3862, 1.2687, 1.5396,\n 1.4237, 1.3112, 1.2019, 1.0954, 0.9918, 0.8909, 0.7924, 0.6963, 0.6025,\n 0.5108, 0.4211, 0.3333, 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.7857,\n 1.0120, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456,\n 0.8660, 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164, 0.4491,\n 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462, 0.1836, 0.1217,\n 0.3026, 0.4815, 0.4191, 0.5955, 0.7701, 0.9428, 0.8793, 0.8165, 0.7543,\n 0.6928, 0.8617, 0.8003, 0.7395, 0.6794, 0.6198, 0.5608, 0.5023, 0.4444,\n 0.3871, 0.5505, 0.7124, 0.6547, 0.5974, 0.5407, 0.4845, 0.6433, 0.5871,\n 0.5315, 0.4763, 0.4216, 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029,\n 0.2562, 0.4082, 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.6983, 0.8447,\n 0.9901, 1.1345, 1.0812, 1.2243, 1.1711, 1.3128, 1.4535, 1.4001, 1.5396,\n 1.6781, 1.8157, 1.7619, 1.8983, 2.0338, 1.9799, 2.1143, 2.0605, 2.0071,\n 2.1401, 2.0868, 2.2188, 2.1656, 2.2966, 2.4267, 2.3735, 2.5026, 2.6309,\n 2.5776, 2.7050, 2.8316, 2.7783, 2.9040, 3.0290, 3.1532, 3.2768, 3.2230,\n 3.3457, 3.4677, 3.5890, 3.5351, 3.6556, 3.7755, 3.8947, 4.0132, 4.1312,\n 4.2485, 4.1940, 4.3106, 4.4265, 4.3721, 4.4873, 4.6020, 4.7161, 4.6616,\n 4.6074, 4.7206, 4.8333, 4.9455, 5.0571, 5.0027, 5.1137, 5.2241, 5.3340,\n 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: 5 The share of gross national saving used to replace depreciated capital has increased over the past 40 years.\nHypothesis: Gross national saving was highest this year.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "45.2%", + "z-score": "6.59", + "p value": "2.21e-11", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.0370, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.3094, 2.5627, 2.8098, 2.6811, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.0123, 2.8943, 3.1160, 3.0000,\n 2.8868, 2.7761, 2.9913, 2.8823, 3.0929, 2.9856, 2.8804, 3.0861,\n 3.2883, 3.1840, 3.0817, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.4562, 3.3607, 3.2667, 3.4503, 3.6315, 3.5382,\n 3.4463, 3.3558, 3.5333, 3.7087, 3.8819, 3.7916, 3.7025, 3.6148,\n 3.7849, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.5282, 3.6927,\n 3.8555, 3.7732, 3.6919, 3.6116, 3.7717, 3.9302, 4.0872, 4.0069,\n 4.1621, 4.3158, 4.2359, 4.1569, 4.0788, 4.2303, 4.1528, 4.3027,\n 4.2258, 4.1497, 4.2977, 4.4444, 4.3687, 4.2938, 4.2196, 4.3644,\n 4.5079, 4.6503, 4.5762, 4.7173, 4.8572, 4.9960, 5.1338, 5.0596,\n 5.1962, 5.1225, 5.0496, 4.9774, 5.1123, 5.2463, 5.1744, 5.1031,\n 5.0325, 5.1650, 5.2965, 5.4272, 5.3567, 5.4863, 5.4163, 5.3468,\n 5.2779, 5.2096, 5.3378, 5.2699, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.5138, 5.4471, 5.3810, 5.3153, 5.4393, 5.5626, 5.6851, 5.6195,\n 5.7411, 5.8621, 5.9822, 6.1017, 6.0362, 5.9711, 6.0897, 6.0249,\n 6.1427, 6.0784, 6.0145, 6.1314, 6.2476, 6.1839, 6.1207, 6.0579,\n 6.1732, 6.2879, 6.4019, 6.3392, 6.4526, 6.3902, 6.3283, 6.2668,\n 6.2057, 6.3180, 6.2572, 6.3689, 6.3084, 6.2482, 6.3592, 6.4695,\n 6.4096, 6.3500, 6.2908, 6.4004, 6.5094, 6.6179, 6.5588, 6.6667,\n 6.6078, 6.5493, 6.6565, 6.5983, 6.5404, 6.6469, 6.5893])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.8898, 1.7233, 2.0466, 1.8856, 1.7321, 1.5852, 1.8889,\n 1.7457, 1.6082, 1.8974, 2.1776, 2.4495, 2.7136, 2.9704, 2.8301, 2.6943,\n 2.9424, 3.1844, 3.0509, 3.2863, 3.5165, 3.7417, 3.6098, 3.8297, 3.7009,\n 3.9158, 4.1265, 4.3333, 4.2064, 4.0825, 3.9614, 3.8431, 3.7273, 3.6141,\n 3.5032, 3.3947, 3.5942, 3.7905, 3.9837, 4.1740, 4.0657, 3.9595, 4.1461,\n 4.0415, 3.9386, 3.8376, 4.0205, 3.9208, 4.1008, 4.0024, 4.1797, 4.3546,\n 4.5274, 4.4296, 4.3333, 4.2385, 4.1451, 4.0531, 3.9624, 3.8730, 3.7849,\n 3.9530, 4.1192, 4.2836, 4.4462, 4.3580, 4.2710, 4.4313, 4.3451, 4.2601,\n 4.1761, 4.3339, 4.2507, 4.4066, 4.3241, 4.4783, 4.6311, 4.7823, 4.7001,\n 4.6188, 4.5384, 4.4590, 4.3804, 4.3027, 4.2258, 4.1497, 4.2977, 4.4444,\n 4.5899, 4.7341, 4.6580, 4.5826, 4.7252, 4.6503, 4.5762, 4.5029, 4.6437,\n 4.5708, 4.7104, 4.6380, 4.7763, 4.9135, 5.0496, 4.9774, 4.9058, 4.8348,\n 4.7645, 4.6949, 4.6258, 4.5573, 4.4895, 4.6232, 4.7559, 4.8878, 5.0187,\n 4.9507, 4.8833, 5.0130, 4.9460, 4.8795, 4.8135, 4.9419, 4.8763, 5.0037,\n 4.9385, 5.0649, 5.1905, 5.3153, 5.2501, 5.1854, 5.1213, 5.0576, 4.9943,\n 4.9316, 4.8693, 4.8074, 4.9303, 5.0525, 5.1739, 5.2947, 5.2327, 5.1711,\n 5.2909, 5.2297, 5.3487, 5.4670, 5.5846, 5.5234, 5.4626, 5.5794, 5.6955,\n 5.8110, 5.7503, 5.8650, 5.8046, 5.9186, 6.0321, 6.1449, 6.0846, 6.0246,\n 5.9651, 5.9059, 5.8470, 5.7885, 5.7304, 5.6727, 5.7841, 5.8951, 6.0054,\n 6.1153, 6.0575, 6.0000, 6.1091, 6.0519, 6.1604, 6.1034, 6.2113, 6.3187,\n 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: So far, however, the number of mail pieces lost to alternative bill-paying methods is too small to have any material impact on First-Class volume.\nHypothesis: The amount of lost mail is huge and really impacts mail volume\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.7543, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.9812, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 0.8847, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 1.0371, 0.9867, 1.1239,\n 1.2603, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 1.1794, 1.1305, 1.0820, 1.0338, 1.1651, 1.1169, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.0096, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.2982, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 3.6108, 3.8490, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.8857, 5.0684, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.7006, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.2232, 7.1243, 7.0268, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.3901, 7.5258, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.3625, 8.2733, 8.3976, 8.3093, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.9496, 9.0680, 8.9815,\n 8.8958, 8.8108, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.3544, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.5366, 10.6397, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.9936, 11.0937, 11.1933, 11.2924, 11.2164, 11.3150,\n 11.2396, 11.3378, 11.2630, 11.1886, 11.2864, 11.2126, 11.1392, 11.2366,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.7169, 11.8117, 11.7389, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: you want to punch the button and go\nHypothesis: You don't want to push the button lightly, but rather punch it hard.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.7772, -1.8220, -1.8665, -1.9107, -1.7488, -1.7931, -1.6330,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.2968, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.3264, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.2744, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.2384, -2.2740, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "67.0%", + "z-score": "13.6", + "p value": "1.62e-42", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.4705, 9.6011,\n 9.4858, 9.3721, 9.5021, 9.3901, 9.5191, 9.6470, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.5909, 10.4903, 10.6061, 10.5067, 10.4083, 10.5236, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.7108, 11.8151, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 11.8571, 11.9594, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.2034, 12.1184, 12.2178, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.5401, 12.6367, 12.5542, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.1617, 13.0821, 13.0030, 13.0956, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.0460, 13.1376, 13.2288, 13.3196, 13.2429, 13.3333,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.6155])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Buffet and a\u00a0 la carte available.\nHypothesis: It has a buffet.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "88", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "33.0%", + "z-score": "1.72", + "p value": "0.0424", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 3.1558, 3.0290, 2.9055, 2.7852, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.4495, 2.3445, 2.5621, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.0647, 1.9711, 2.1798, 2.0870, 1.9959, 2.1997, 2.1094, 2.3094,\n 2.2200, 2.1320, 2.0455, 1.9604, 1.8766, 1.7942, 1.7130, 1.9052,\n 1.8245, 2.0135, 1.9333, 2.1193, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.8378, 1.7638, 1.6908, 1.8677, 1.7951, 1.7233])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 8.9709, 9.1130, 8.9815,\n 9.1225, 9.2620, 9.1333, 9.2717, 9.4087, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 9.8271, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.2187, 11.1111, 11.2259, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.8982, 11.7932, 11.9029, 11.7992, 11.9083, 12.0167,\n 11.9144, 12.0223, 12.1295, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.4567, 12.3586, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 13.0732, 12.9771, 13.0771, 12.9820, 13.0815, 13.1806, 13.0866,\n 13.1852, 13.2834, 13.1905, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.5827, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.0619, 14.1543, 14.0660, 14.1582, 14.2499, 14.1625, 14.2539,\n 14.3449, 14.2584, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.5378, 14.6267, 14.7152, 14.8034, 14.8912, 14.9786, 15.0657, 15.1524,\n 15.0689, 15.1553, 15.0726, 15.1587, 15.2446, 15.1625, 15.2481, 15.3333,\n 15.2520, 15.3370, 15.4217, 15.5060, 15.5900, 15.6736, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Blue says Blumenthal claimed Clinton had told him that Lewinsky had made unwanted sexual advances.\nHypothesis: Clinton said that Monica Lewinsky made unwanted sexual advances during her time as a journalist in the White House. \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.2872, -0.1143, 0.0569, 0.2265,\n 0.1690, 0.3365, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.7293, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.5547, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.6351, 0.5879, 0.5410, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.2909, 0.4145, 0.5375, 0.4949, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "61.5%", + "z-score": "11.8", + "p value": "2.38e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.1684, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.0358, 7.8923, 8.0498, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.6624, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 7.7710, 7.6512, 7.8000, 7.6823, 7.8296, 7.7139, 7.6000, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.0863, 8.9815, 9.1101, 9.2376, 9.1343, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.9813, 9.8858, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.7074, 10.6145,\n 10.7257, 10.6338, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.8916,\n 10.8025, 10.9107, 10.8224, 10.9301, 10.8426, 10.7559, 10.6700, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.6306, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.5955, 10.5145, 10.4341, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.5625, 10.4858, 10.5884, 10.5123,\n 10.4367, 10.3617, 10.2872, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.8186, 10.9178, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.5261, 11.4533, 11.5489, 11.6441, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.7833])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: We also have found that leading organizations strive to ensure that their core processes efficiently and effectively support mission-related outcomes.\nHypothesis: Leading organizations want to be sure their employees are safe.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "200", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.5%", + "z-score": "-0.49", + "p value": "0.688", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.2837, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -0.8076, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.2334, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.7539, -0.7937, -0.6667,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139, -0.4899])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 5.8140, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.6681, 7.5593, 7.7026, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.8518, 8.7482, 8.8780, 8.7757, 8.6747, 8.8036, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.1925, 9.0987, 9.0057, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.4225, 9.5400, 9.4501, 9.3611, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.4474, 9.5620, 9.6758, 9.5902, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.5638, 9.4812, 9.5931, 9.7044, 9.6225,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.8170, 10.7415, 10.8421, 10.7671, 10.8673, 10.9669,\n 10.8925, 10.9917, 10.9178, 11.0165, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.4525, 11.5470, 11.6411, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: oh uh-huh well no they wouldn't would they no\nHypothesis: No, they wouldn't go there.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "10.6%", + "z-score": "-4.71", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.6976, -2.7456, -2.7932, -2.8402,\n -2.8868, -2.9329, -2.9785, -3.0237, -3.0685, -3.1129, -2.9092, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -3.1730, -3.2157, -3.2579, -3.2998,\n -3.1069, -3.1493, -3.1914, -3.2332, -3.2746, -3.3156, -3.3564, -3.3968,\n -3.4370, -3.4768, -3.2931, -3.3333, -3.3733, -3.4130, -3.4524, -3.4915,\n -3.5303, -3.5689, -3.6072, -3.6452, -3.4694, -3.5079, -3.5460, -3.5839,\n -3.6216, -3.6590, -3.6961, -3.7330, -3.7697, -3.8061, -3.6374, -3.6742,\n -3.7108, -3.7471, -3.7832, -3.8191, -3.8548, -3.8903, -3.9255, -3.9606,\n -3.7981, -3.8335, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -4.0415,\n -4.0754, -4.1092, -3.9524, -3.9865, -4.0204, -4.0541, -4.0876, -4.1210,\n -4.1542, -4.1872, -4.2200, -4.2527, -4.1009, -4.1338, -4.1666, -4.1992,\n -4.2316, -4.2639, -4.2960, -4.3280, -4.3598, -4.3915, -4.2443, -4.2762,\n -4.3079, -4.3395, -4.3710, -4.4023, -4.4334, -4.4644, -4.4953, -4.5260,\n -4.3830, -4.4140, -4.4448, -4.4754, -4.5060, -4.5364, -4.5666, -4.5968,\n -4.6268, -4.6567, -4.5175, -4.5476, -4.5776, -4.6074, -4.6371, -4.6667,\n -4.6961, -4.7255, -4.7547, -4.7838, -4.6482, -4.6775, -4.7066])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 4.9135, 5.1698, 5.4174, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.5769, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.0631, 9.9846, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.3827, 10.3065, 10.4097, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.9178, 10.8444, 10.7714, 10.8702, 10.9685, 11.0663,\n 10.9939, 10.9220, 11.0194, 11.1164, 11.2129, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Sorry but that's how it is.\nHypothesis: This is how things are and there are no apologies about it.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.4606,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.5275, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.4403, 1.6187, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.7552, 1.6854, 1.8559, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.8475, 1.7809, 1.9437, 2.1049, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 1.9009, 2.0548, 2.2074, 2.3586, 2.2952, 2.2323, 2.1700, 2.1082,\n 2.0470, 1.9863, 1.9261, 2.0739, 2.0140, 2.1602, 2.3054, 2.4495,\n 2.5925, 2.5318, 2.4717, 2.4121, 2.5532, 2.6933, 2.6336, 2.5744,\n 2.5156, 2.6540, 2.5954, 2.5373, 2.4797, 2.6163, 2.5589, 2.6943,\n 2.8288, 2.9625, 2.9048, 2.8475, 2.9798, 3.1113, 3.2419, 3.1844,\n 3.1273, 3.0706, 3.0143, 2.9584, 2.9029, 2.8478, 2.7930, 2.7386,\n 2.6846, 2.6309, 2.5776, 2.5247, 2.4721, 2.5990, 2.7253, 2.8508,\n 2.7979, 2.7454, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.4099, 2.5319, 2.4822, 2.6034, 2.7240,\n 2.8440, 2.7940, 2.7443, 2.6949, 2.6458, 2.5969, 2.5483, 2.5000,\n 2.4520, 2.4042, 2.5220, 2.4744, 2.4269, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.1241, 5.3134, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.3277, 12.4286, 12.5289, 12.6287, 12.7279, 12.8267, 12.9249,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.7986, 13.8914, 13.8051, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.2633, 14.3537, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.8034, 14.7195, 14.8074, 14.8950, 14.9821,\n 15.0689, 15.1553, 15.2414, 15.1587, 15.2446, 15.3301, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.5870, 15.6709, 15.7545, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Not quite as large is the Papal Crose commemorating Pope John Paul II's visit in 1979, when more than one million people gathered to celebrate mass.\nHypothesis: Pope John Paul II also visited in 1983.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.3919, -2.4495, -2.5062, -2.2418, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.3102, -2.3570,\n -2.4035, -2.2162, -2.0309, -2.0785, -2.1256, -1.9437, -1.9911, -2.0381,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.7341, -2.7721, -2.8098,\n -2.6605, -2.5123, -2.5506, -2.5886, -2.4421, -2.4803, -2.5183, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.5020, -2.5386, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.4461, -2.3110, -2.3473, -2.2133,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "166", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "62.0%", + "z-score": "11", + "p value": "1.47e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.4222, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.3960, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.8041,\n 6.9601, 6.8458, 6.7333, 6.8876, 7.0401, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 8.8227, 8.9469,\n 8.8529, 8.9763, 9.0987, 9.2202, 9.1273, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.5400, 9.6566, 9.5668, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.6481, 10.5621, 10.4769, 10.3923,\n 10.3085, 10.2253, 10.3333, 10.2509, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.5893,\n 10.6929, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The tourist industry continued to expand, and though it became one of the top two income earners in Spain, a realization that unrestricted mass tourism was leading to damaging long-term consequences also began to grow.\nHypothesis: Tourism is not very big in Spain.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.8%", + "z-score": "2.87", + "p value": "0.00204", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.7395, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 1.0319, 1.1896, 1.3460, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.5097, 1.4517, 1.6008, 1.5430, 1.6906, 1.8371,\n 1.7792, 1.9242, 1.8664, 1.8091, 1.7522, 1.6958, 1.6398, 1.7823,\n 1.9237, 2.0642, 2.2037, 2.1470, 2.0907, 2.0349, 1.9795, 1.9245,\n 1.8699, 2.0068, 2.1429, 2.0881, 2.2230, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.0868, 2.2188, 2.3500, 2.4803, 2.6099, 2.5560,\n 2.5026, 2.4495, 2.3967, 2.3443, 2.2923, 2.4198, 2.5466, 2.4944,\n 2.6203, 2.5683, 2.5166, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.4099, 2.3603, 2.4822, 2.6034, 2.5538,\n 2.5044, 2.4553, 2.4065, 2.3580, 2.3098, 2.4294, 2.5483, 2.5000,\n 2.6182, 2.5700, 2.6874, 2.8043, 2.7560, 2.8721])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.5320,\n 6.3317, 6.1389, 6.3434, 6.1584, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.0219, 6.8718, 6.7254, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.5435, 8.4285, 8.5672, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.3641, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.7219, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.2348, 10.3496, 10.2554, 10.1621, 10.0698, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.2514, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.6052, 10.5175, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.6311, 11.5471, 11.6487, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.9020, 12.0008, 11.9197, 12.0180, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.3263, 12.4223, 12.5179, 12.4384, 12.5336,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.8313, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.6667,\n 13.7559, 13.8447, 13.7679, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: She seemed so different \"\nHypothesis: She had changed a lot since the last time we'd seen her.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.7332, 0.9456, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 1.0136, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.3483, 1.5164, 1.4506, 1.3856, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.4664, 1.6222, 1.7767, 1.9298, 1.8682, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.7772, 1.7179, 1.6591, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.6081, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.5187, 1.4662, 1.4142, 1.5505, 1.4985,\n 1.6337, 1.7679, 1.9013, 2.0339, 1.9813, 1.9291, 2.0604, 2.1909,\n 2.1386, 2.0866, 2.0350, 1.9837, 2.1125, 2.2406, 2.1892, 2.3163,\n 2.2650, 2.2140, 2.1634, 2.1131, 2.0631, 2.1884, 2.1385, 2.0889,\n 2.0396, 2.1637, 2.1145, 2.0656, 2.0170, 1.9686, 1.9206, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.6042, 1.7233, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.7543, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.4285, 8.3152, 8.4540, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.6678, 8.8007, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.5149, 8.4138, 8.3138, 8.4449, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.0504, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.2106, 10.3209, 10.2348, 10.3445, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.2253, 10.1429, 10.0611, 9.9800, 10.0881, 10.0076, 9.9278,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.7175, 10.8200, 10.7423, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.4132, 11.5109, 11.4356, 11.5329, 11.4581, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.6217, 11.7169, 11.6441, 11.7389, 11.6667,\n 11.5948, 11.5235, 11.6179, 11.5470, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: i'm not opposed to it but when its when the time is right it will probably just kind of happen you know\nHypothesis: I cannot wait for it to happen.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.4857, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.6710, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.6337, 1.5818, 1.5303, 1.6641, 1.7970, 1.7454, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.8033, 1.7529, 1.7028, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.8660, 1.9906, 2.1145, 2.0656, 2.0170, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 2.0688, 2.0212, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.9795, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.0000,\n 4.8662, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.2147, 9.3422, 9.4685, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.5236, 10.6380, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.2040, 12.1125, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.3361, 13.2499, 13.3447, 13.2593, 13.3537, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.4661, 13.5589, 13.6514, 13.7434, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.4651, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.5659, 14.4850, 14.5726, 14.6599, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The census of 1931 served as an alarm signal for the Malay national consciousness.\nHypothesis: The 1931 Malay census was an alarm bell.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.1822, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -1.7963, -1.8594, -1.6013, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -1.7942, -1.8500, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.7635, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.7566, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.5617, -1.6025, -1.6432,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.7021, -1.7408,\n -1.6057, -1.6444, -1.5104, -1.5492, -1.5878, -1.6262, -1.4938, -1.5323,\n -1.5706, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "191", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "74.9%", + "z-score": "15.9", + "p value": "2.43e-57", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.0990, 9.2418, 9.3831, 9.5229, 9.6612, 9.5258,\n 9.3927, 9.2620, 9.4000, 9.5366, 9.6719, 9.8058, 9.9384, 10.0698,\n 9.9433, 9.8187, 9.6960, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.1033, 10.9955, 11.1111, 11.2259, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.8982, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.2314, 12.3377, 12.4434, 12.5485, 12.6529, 12.5517, 12.6557,\n 12.7590, 12.8618, 12.9639, 13.0655, 12.9662, 13.0674, 13.1680, 13.2680,\n 13.3674, 13.2698, 13.1730, 13.0771, 13.1765, 13.2753, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.5714, 13.4780, 13.5746, 13.6707, 13.7663, 13.8615,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.1458, 14.0550, 14.1481, 14.2408,\n 14.3330, 14.4248, 14.3352, 14.4267, 14.5178, 14.6084, 14.6986, 14.7885,\n 14.7002, 14.7898, 14.8789, 14.9677, 15.0560, 14.9689, 15.0570, 15.1448,\n 15.2321, 15.3191, 15.4057, 15.3198, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.5783, 15.6633, 15.7481, 15.8325, 15.9165, 15.8327, 15.9165])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Mortifyingly enough, it is all the difficulty, the laziness, the pathetic formlessness in youth, the round peg in the square hole, the whatever do you want?\nHypothesis: Many youth are lazy.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.3%", + "z-score": "0.739", + "p value": "0.23", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.4281, -1.4907, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.4967, -1.3101, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -0.7939, -0.6325,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.3478, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.2397, -0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.2208, 0.3522, 0.4828, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.4949, 0.6170, 0.7385])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 6.8483, 6.7469, 6.6469,\n 6.5483, 6.6944, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 6.8354,\n 6.7414, 6.8819, 7.0211, 7.1591, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.5556, 7.6867, 7.8168, 7.7268, 7.6376,\n 7.7667, 7.8948, 8.0219, 7.9336, 8.0598, 7.9724, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 8.8108, 8.9285, 8.8443, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.2554, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.6016, 9.7109, 9.8197, 9.7405,\n 9.6619, 9.7701, 9.8776, 9.9846, 9.9067, 10.0131, 9.9357, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.4909, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.6455, 10.7451, 10.8444, 10.7714, 10.8702, 10.7978, 10.8961,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.2129, 11.1415, 11.2376, 11.3333,\n 11.2624, 11.1919, 11.2872, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Conversely, an increase in government saving adds to the supply of resources available for investment and may put downward pressure on interest rates.\nHypothesis: Interest rates should increase to increase saving.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.89", + "p value": "0.97", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.5483, -1.3624, -1.4142,\n -1.2310, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.6246, -1.6667, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.6241, -1.6632, -1.7021, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.6466, -1.6843, -1.7218, -1.7592, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 3.8411, 3.6148, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.6790, 4.5569, 4.7488, 4.6291,\n 4.8177, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.1996, 6.3502, 6.2517, 6.1546,\n 6.3035, 6.2075, 6.1128, 6.0193, 5.9270, 6.0740, 6.2197, 6.1283,\n 6.0380, 6.1820, 6.0927, 6.2354, 6.3768, 6.5169, 6.4283, 6.3408,\n 6.2541, 6.1685, 6.3070, 6.4444, 6.3595, 6.4957, 6.6308, 6.7648,\n 6.8977, 6.8133, 6.9451, 7.0759, 6.9923, 7.1220, 7.0391, 7.1678,\n 7.0857, 7.2134, 7.3402, 7.2587, 7.3845, 7.5094, 7.6335, 7.7567,\n 7.6758, 7.5955, 7.7178, 7.6383, 7.7597, 7.8803, 7.8014, 7.7232,\n 7.8429, 7.9619, 7.8842, 7.8072, 7.9253, 7.8489, 7.7732, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.9729, 7.8988, 8.0139, 7.9403, 7.8673,\n 7.9816, 7.9091, 8.0227, 7.9507, 8.0636, 8.1758, 8.2874, 8.3984,\n 8.5088, 8.4371, 8.3660, 8.4757, 8.5848, 8.6933, 8.6226, 8.7305,\n 8.8379, 8.9447, 9.0510, 9.1567, 9.2619, 9.3665, 9.4707, 9.4002,\n 9.5038, 9.4338, 9.5369, 9.6394, 9.7415, 9.8431, 9.9442, 9.8746,\n 9.9752, 10.0753, 10.0061, 10.1058, 10.0371, 9.9687, 10.0679, 10.0000,\n 9.9325, 10.0312, 9.9641, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Britain's best-selling tabloid, the Sun , announced as a front-page world exclusive Friday that Texan model Jerry Hall has started divorce proceedings against aging rock star Mick Jagger at the High Court in London.\nHypothesis: There is a British publication called the Sun.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 5.9797, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 7.9286, 8.0829,\n 7.9489, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.4858, 9.3721, 9.5021, 9.6309, 9.7586, 9.6470, 9.5368, 9.6638,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.7066, 10.6061, 10.7211, 10.6218, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.4310,\n 11.3357, 11.2414, 11.3497, 11.4574, 11.5645, 11.4714, 11.3791, 11.4857,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.1492, 12.0611, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.3829, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.7612, 13.8522, 13.9427, 14.0329, 14.1227, 14.2121, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: He was of two minds, one reveled in the peace of this village.\nHypothesis: He loved how peaceful the village was.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.3660, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.7013, -2.7358, -2.7701, -2.8043, -2.8383, -2.8721, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "152", + "Fraction of T in Greenlist": "76.4%", + "z-score": "16.7", + "p value": "3.39e-63", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.5144, 7.3131, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.8320, 8.0018, 7.8320, 7.6667,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 9.8020, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.3314, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.4565, 10.5838, 10.7099, 10.8350, 10.9589, 11.0818, 10.9560, 11.0782,\n 11.1994, 11.0762, 11.1967, 11.3163, 11.4349, 11.5525, 11.4323, 11.5494,\n 11.6656, 11.7809, 11.6632, 11.5470, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.7597, 12.8653, 12.9704, 13.0748, 13.1785, 13.0707,\n 12.9641, 13.0677, 13.1707, 13.2730, 13.3747, 13.4758, 13.5764, 13.4722,\n 13.3690, 13.4694, 13.5693, 13.6685, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.9594, 14.0561, 13.9565, 14.0530, 14.1489, 14.0505, 14.1462, 14.2413,\n 14.3360, 14.4301, 14.3333, 14.4272, 14.5206, 14.6135, 14.5181, 14.4234,\n 14.3295, 14.4225, 14.5150, 14.6071, 14.6987, 14.7899, 14.8807, 14.7885,\n 14.8790, 14.9691, 15.0588, 15.1481, 15.2369, 15.3254, 15.4135, 15.5012,\n 15.5885, 15.6754, 15.7619, 15.6720, 15.5828, 15.6692, 15.7553, 15.8411,\n 15.9264, 16.0115, 16.0961, 16.0083, 15.9211, 16.0057, 16.0900, 16.1739,\n 16.0877, 16.1713, 16.2547, 16.3377, 16.4205, 16.5028, 16.4178, 16.5000,\n 16.5819, 16.4976, 16.5793, 16.6607, 16.7417, 16.8225, 16.7393])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The vineyards hug the gentle slopes between the Vosges and the Rhine Valley along a single narrow 120-km (75-mile) strip that stretches from Marlenheim, just west of Strasbourg, down to Thann, outside Mulhouse.\nHypothesis: The slopes between the Vosges and Rhine Valley are the only place appropriate for vineyards.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "174", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "19.0%", + "z-score": "-1.84", + "p value": "0.967", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.2711, -2.3150, -2.3586, -2.1884, -2.2323, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.0739, -2.1172, -1.9545, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.6081, -1.6521, -1.4963, -1.3416, -1.3862,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -1.8383])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.3795, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.4540, 8.3425, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.7778, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.3409, 10.2514, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.4935, 11.5966, 11.5111, 11.6137,\n 11.7157, 11.6311, 11.5471, 11.6487, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.8846, 11.8028, 11.9020, 12.0008, 11.9197, 12.0180, 11.9377,\n 12.0355, 11.9558, 12.0532, 11.9741, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.6757, 12.5986,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.0910, 13.0157, 12.9410, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: We did not study the reasons for these deviations specifically, but they likely result from the context in which federal CIOs operate.\nHypothesis: These deviations mostly involve failure to apply software updates in a timely manner.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.7%", + "z-score": "-1.07", + "p value": "0.857", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.1399, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.0226, 7.1857, 7.3467, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.8928,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.9086, 8.8007, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.7482, 8.8780, 9.0067, 8.9045, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 9.9085,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.1840, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.4738, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 11.0371, 10.9497, 10.8631, 10.9697,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.7498, 11.6666, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.7217, 11.8210, 11.7405, 11.8393, 11.9377,\n 11.8579, 11.7787, 11.8766, 11.9741, 12.0712, 11.9927, 11.9147, 12.0114,\n 12.1076, 12.0302, 11.9534, 11.8771, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.3888, 12.4818, 12.4074, 12.3333,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Rouen is the ancient center of Normandy's thriving textile industry, and the place of Joan of Arc's martyrdom ' a national symbol of resistance to tyranny.\nHypothesis: Joan of Arc sacrificed her life at Rouen, which became an enduring symbol of opposition to tyranny.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.3499, -0.4062, -0.2309, -0.0574, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.4685, 0.6222, 0.5680, 0.5143, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, 0.0000,\n -0.0455, -0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.2100, -0.0838, -0.1253, 0.0000,\n 0.1247, 0.2487, 0.2067, 0.3299, 0.2879, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "61.4%", + "z-score": "11.8", + "p value": "1.83e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.6995,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.0370, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.0631, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.5625, 10.4858, 10.4097, 10.5123,\n 10.4367, 10.3617, 10.2872, 10.3893, 10.3154, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.3999, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 11.0450, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Table 2: Examples of BLM's, FHWA's, IRS's, and VBA's Customer Satisfaction Expectations for Senior Executive Performance\nHypothesis: Senior Executive's have been studies on various aspects to reach the expectations.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 1.0289, 0.9631, 0.8980, 1.0719, 1.0070, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.3856, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 1.0050, 1.1514, 1.0973, 1.0435, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.8987, 0.8485, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.9739, 0.9245, 0.8755, 1.0105, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 1.0820, 1.0338, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.2804, 1.4087, 1.3607, 1.4881, 1.4402, 1.3926,\n 1.3453, 1.4713, 1.4241, 1.5492, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.3197, 1.4397, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.9853, 8.1176, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.7600, 8.6679, 8.7913, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.1302, 9.2463, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.2240, 9.3380, 9.2554, 9.1735, 9.2867, 9.3993, 9.3181, 9.2376,\n 9.1577, 9.0786, 9.1905, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.6148, 9.7224, 9.8293, 9.7526, 9.8590,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.3827, 10.3065, 10.2310, 10.3341,\n 10.2591, 10.1846, 10.2872, 10.3893, 10.3154, 10.4170, 10.3435, 10.2706,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.5998, 10.6990, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.8505, 10.7795, 10.7090, 10.8064, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.7222, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In Hong Kong you can have a plate, or even a whole dinner service, hand-painted to your own design.\nHypothesis: It's impossible to have a plate hand-painted to your own design in Hong Kong.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -0.9631, -0.7783, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.5695, -0.6149, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.3698, -0.2304, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.1305, 3.4641, 3.2206, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.2156, 2.5281, 2.3570, 2.1939, 2.4910, 2.3333,\n 2.1822, 2.0370, 1.8974, 1.7628, 1.6330, 1.5076, 1.3862, 1.2687, 1.5396,\n 1.8034, 2.0605, 1.9415, 2.1909, 2.4345, 2.3163, 2.5533, 2.4371, 2.3238,\n 2.2133, 2.1054, 2.0000, 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142,\n 1.3234, 1.2344, 1.4530, 1.6678, 1.8791, 1.7889, 1.9959, 2.1997, 2.4004,\n 2.5981, 2.7928, 2.9848, 3.1741, 3.3607, 3.2667, 3.4503, 3.6315, 3.8103,\n 3.9869, 3.8927, 4.0667, 3.9736, 4.1451, 4.0531, 4.2222, 4.1312, 4.2981,\n 4.4630, 4.3727, 4.2836, 4.4462, 4.3580, 4.5186, 4.6775, 4.8347, 4.9904,\n 5.1444, 5.0562, 5.2086, 5.3594, 5.2719, 5.4212, 5.3345, 5.2489, 5.1643,\n 5.3116, 5.2278, 5.3736, 5.2906, 5.4349, 5.5780, 5.7199, 5.6373, 5.5556,\n 5.6959, 5.6149, 5.7540, 5.6737, 5.8114, 5.9481, 6.0837, 6.2183, 6.1382,\n 6.2716, 6.4040, 6.5354, 6.4558, 6.3770, 6.2990, 6.4291, 6.3517, 6.4807,\n 6.6089, 6.7361, 6.6591, 6.7854, 6.7090, 6.8343, 6.7585, 6.6833, 6.8076,\n 6.7330, 6.8564, 6.7823, 6.7089, 6.8313, 6.9529, 6.8799, 7.0007, 6.9282,\n 7.0481, 7.1673, 7.2857, 7.4034, 7.5204, 7.4482, 7.3765, 7.4927, 7.6082,\n 7.7230, 7.6517, 7.5809, 7.5106, 7.4409, 7.5548, 7.6681, 7.7808, 7.7114,\n 7.6424, 7.7544, 7.8657, 7.9764, 7.9078, 8.0178, 8.1273, 8.2362, 8.3446,\n 8.4523, 8.3840, 8.4911, 8.4232, 8.5298, 8.4623, 8.5683, 8.5012, 8.6066,\n 8.5399, 8.4736, 8.4078, 8.3423, 8.4471, 8.3820, 8.3173, 8.4215, 8.3572,\n 8.2933, 8.3969, 8.5000, 8.6026, 8.5390, 8.6411, 8.7427, 8.8439, 8.7805,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: There are a number of expensive jewelry and other duty-free shops, all with goods priced in US dollars (duty-free goods must always be paid for in foreign currency).\nHypothesis: Jewelry and duty-free shops are an interesting place to buy goods.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "11.0%", + "z-score": "-2.77", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -1.8226, -1.8974, -1.9702, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.3333,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.7705])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188, 4.3409, 4.6268,\n 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998, 3.5796, 3.8497, 4.1111,\n 4.3644, 4.6101, 4.4272, 4.2515, 4.0825, 3.9196, 4.1586, 4.0012, 3.8490,\n 4.0814, 4.3083, 4.5301, 4.7469, 4.9592, 5.1671, 5.3708, 5.5705, 5.4222,\n 5.2778, 5.1371, 5.0000, 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 5.8140,\n 5.9944, 6.1721, 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648,\n 6.9282, 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460, 7.8905,\n 7.7784, 7.6681, 7.5593, 7.4521, 7.3464, 7.2421, 7.1393, 7.2827, 7.1813,\n 7.0812, 6.9824, 6.8849, 7.0268, 6.9305, 7.0711, 6.9759, 7.1152, 7.0211,\n 6.9282, 6.8364, 6.7456, 6.6559, 6.5672, 6.4795, 6.6171, 6.5303, 6.4444,\n 6.3595, 6.2755, 6.4116, 6.3283, 6.4632, 6.3807, 6.5144, 6.4327, 6.3517,\n 6.2716, 6.1923, 6.1137, 6.0359, 5.9589, 6.0908, 6.0143, 5.9386, 5.8635,\n 5.7892, 5.9196, 6.0491, 6.1777, 6.3054, 6.2312, 6.1577, 6.0848, 6.0125,\n 6.1389, 6.0671, 5.9960, 6.1213, 6.0506, 6.1750, 6.1047, 6.0351, 5.9660,\n 5.8974, 5.8294, 5.9524, 6.0746, 6.1961, 6.1283, 6.0609, 5.9941, 5.9279,\n 6.0481, 5.9822, 6.1017, 6.0362, 6.1548, 6.0897, 6.0249, 5.9607, 5.8969,\n 5.8336, 5.9510, 6.0678, 6.1839, 6.1207, 6.0579, 5.9956, 5.9336, 6.0487,\n 6.1632, 6.2770, 6.3902, 6.3283, 6.2668, 6.2057, 6.1449, 6.2572, 6.3689,\n 6.4800, 6.5906, 6.7006, 6.6398, 6.7492, 6.8580, 6.7974, 6.9056, 7.0133,\n 7.1205, 7.2272, 7.1667, 7.2728, 7.3783, 7.3180, 7.4231, 7.5276, 7.4676,\n 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: What's truly striking, though, is that Jobs has never really let this idea go.\nHypothesis: Jobs never held onto an idea for long.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.64", + "p value": "0.000135", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, 0.1974, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.8337, 1.0070, 1.1785,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.3587,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.8370, 1.9863, 2.1344, 2.0739, 2.0140, 1.9545, 2.1005, 2.0412,\n 1.9825, 2.1268, 2.0682, 2.2111, 2.1527, 2.0948, 2.0373, 2.1783,\n 2.1210, 2.2608, 2.3995, 2.5373, 2.4797, 2.4225, 2.3657, 2.5019,\n 2.4453, 2.3891, 2.5238, 2.4678, 2.6014, 2.5456, 2.4902, 2.4351,\n 2.5672, 2.5123, 2.6433, 2.7735, 2.9029, 2.8478, 2.7930, 2.9212,\n 2.8666, 2.9938, 2.9394, 2.8853, 3.0114, 2.9575, 3.0827, 3.0290,\n 2.9756, 2.9225, 3.0464, 2.9935, 3.1166, 3.2389, 3.3606, 3.3075,\n 3.2547, 3.2023, 3.3228, 3.2705, 3.2186, 3.3381, 3.2863, 3.4050,\n 3.3534, 3.3020, 3.2509, 3.3686, 3.3177, 3.4346, 3.5509, 3.6667,\n 3.6156, 3.5648, 3.5143, 3.6291, 3.5787, 3.5286, 3.6425])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140, 4.9652, 4.7556, 5.0000,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.1257, 4.9507, 4.7819, 5.0037,\n 5.2204, 5.4322, 5.2697, 5.4772, 5.6805, 5.8797, 6.0751, 5.9186, 5.7664,\n 5.9588, 5.8108, 5.6667, 5.5261, 5.7155, 5.5783, 5.7646, 5.6307, 5.4997,\n 5.3716, 5.2463, 5.1236, 5.3067, 5.1864, 5.0684, 4.9528, 5.1326, 5.3100,\n 5.4848, 5.6573, 5.8275, 5.7133, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.6086, 5.5035, 5.4000, 5.5630, 5.4610, 5.6220, 5.5213, 5.4222, 5.3245,\n 5.2281, 5.3867, 5.2915, 5.1977, 5.3541, 5.2614, 5.1698, 5.0795, 5.2338,\n 5.1444, 5.2970, 5.2086, 5.1212, 5.2719, 5.1855, 5.1000, 5.0156, 5.1643,\n 5.3116, 5.2278, 5.1450, 5.0630, 5.2085, 5.3526, 5.4956, 5.6373, 5.5556,\n 5.6959, 5.8351, 5.7540, 5.8919, 6.0287, 6.1644, 6.0837, 6.2183, 6.3517,\n 6.4842, 6.4040, 6.5354, 6.4558, 6.3770, 6.5072, 6.4291, 6.5583, 6.6865,\n 6.6089, 6.5320, 6.6591, 6.7854, 6.7090, 6.8343, 6.9587, 7.0823, 7.0063,\n 7.1291, 7.0537, 6.9789, 7.1007, 7.0265, 7.1474, 7.2675, 7.1938, 7.3131,\n 7.4317, 7.3584, 7.2857, 7.2136, 7.1421, 7.2596, 7.3765, 7.4927, 7.4215,\n 7.5369, 7.6517, 7.7658, 7.8793, 7.8084, 7.7380, 7.8507, 7.7808, 7.7114,\n 7.6424, 7.7544, 7.6859, 7.7971, 7.7291, 7.6615, 7.5944, 7.5277, 7.4615,\n 7.5719, 7.5061, 7.4407, 7.3758, 7.4853, 7.5944, 7.7028, 7.8107, 7.9181,\n 7.8533, 7.7889, 7.8956, 7.8316, 7.7679, 7.8740, 7.8107, 7.7478, 7.6853,\n 7.7907, 7.7285, 7.8333, 7.7715, 7.7099, 7.6488, 7.5880, 7.6922, 7.6317,\n 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: While it's probably true that democracies are unlikely to go to war unless they're attacked, sometimes they are the first to take the offensive.\nHypothesis: Democracies probably won't go to war unless someone attacks them on their soil\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.1939, 2.0381, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.3094, 2.1831, 2.0605, 1.9415, 1.8257,\n 2.0738, 2.3163, 2.2011, 2.0889, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.3308, 1.2599, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.5511, 1.7150, 1.6498, 1.5852,\n 1.7467, 1.6823, 1.8419, 1.7778, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.4757,\n 1.6271, 1.7772, 1.7179, 1.8665, 2.0140, 2.1602, 2.3054, 2.2454,\n 2.1858, 2.1268, 2.0682, 2.0101, 1.9524, 2.0948, 2.0373, 2.1783,\n 2.1210, 2.0642, 2.2037, 2.1470, 2.2852, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.0068, 1.9524, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732,\n 2.0071, 2.1401, 2.2723, 2.2188, 2.3500, 2.4803, 2.6099, 2.7386,\n 2.6846, 2.6309, 2.5776, 2.5247, 2.4721, 2.4198, 2.5466, 2.4944,\n 2.6203, 2.5683, 2.5166, 2.6414, 2.5898, 2.7137, 2.6623, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.4099, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.4553, 2.5754, 2.6949, 2.6458, 2.7644, 2.8825, 3.0000,\n 3.1169, 3.0674, 3.0182, 2.9692, 2.9205, 2.8721, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.0990, 9.2418, 9.3831, 9.5229, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.6667, 9.8015, 9.6719, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.9585, 11.0780,\n 10.9621, 11.0810, 10.9669, 11.0851, 11.2025, 11.0902, 11.2069, 11.3228,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.6723, 11.7838,\n 11.8944, 12.0044, 11.8982, 12.0077, 12.1164, 12.2244, 12.1200, 12.2275,\n 12.1244, 12.2314, 12.3377, 12.2360, 12.3419, 12.4471, 12.3468, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.7755, 12.8766, 12.9771, 13.0771, 12.9820, 13.0815, 12.9874, 13.0866,\n 13.1852, 13.0922, 13.1905, 13.2882, 13.1962, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.4021, 13.4977, 13.5929, 13.6876, 13.5987, 13.6931,\n 13.7870, 13.8804, 13.7926, 13.8857, 13.7986, 13.8914, 13.9838, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.0872, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.3642, 14.4536, 14.5426, 14.6313, 14.5479, 14.6362, 14.7242, 14.8119,\n 14.7293, 14.8167, 14.7348, 14.8219, 14.9086, 14.8274, 14.9139, 15.0000,\n 14.9195, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: But to you, who know the truth, I propose to read certain passages which will throw some light on the extraordinary mentality of this great man.\" He opened the book, and turned the thin pages.\nHypothesis: There is no information on the mentality of the man, extraordinary or not, contained within the thin-paged book. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.8520, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.6433, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.8374, 0.7884, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.9272, 1.0565, 1.0096, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 0.9062, 1.0328, 1.1587, 1.1127, 1.2377, 1.1918,\n 1.1461, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.2752, 1.3950, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "152", + "Fraction of T in Greenlist": "76.4%", + "z-score": "16.7", + "p value": "3.39e-63", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.4678, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 7.8512, 8.0139, 7.8628, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.1024, 10.2283, 10.1124, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.5492,\n 11.6631, 11.5519, 11.6652, 11.5556, 11.6683, 11.5601, 11.6723, 11.7838,\n 11.8944, 12.0044, 11.8982, 12.0077, 11.9029, 11.7992, 11.9083, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.0286, 12.1353, 12.2414, 12.3468, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.7660, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.2722, 13.3710, 13.2753, 13.1806, 13.2791,\n 13.3770, 13.4745, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 13.8615,\n 13.9561, 13.8642, 13.9585, 14.0524, 14.1458, 14.2388, 14.3313, 14.4234,\n 14.5150, 14.6062, 14.6970, 14.7874, 14.8773, 14.9669, 15.0560, 14.9666,\n 15.0555, 15.1440, 15.2321, 15.1438, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.5793, 15.6653, 15.7509, 15.8362, 15.9211, 16.0057, 15.9193, 16.0036,\n 15.9179, 16.0020, 15.9170, 16.0009, 16.0845, 16.1678, 16.2507, 16.3333,\n 16.4156, 16.4976, 16.5793, 16.4957, 16.5772, 16.6584, 16.7393])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah i mean just when uh the they military paid for her education\nHypothesis: The military didn't pay for her education.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.8%", + "z-score": "4.45", + "p value": "4.22e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.3426, 0.5443,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.9869, 1.1767, 1.3641, 1.5492,\n 1.7321, 1.9127, 1.8378, 2.0158, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.8257, 1.9973, 2.1669, 2.3349, 2.5011, 2.6656, 2.5927,\n 2.5207, 2.4495, 2.6112, 2.7713, 2.9299, 3.0870, 3.2426, 3.1704,\n 3.3243, 3.2525, 3.1814, 3.1111, 3.0415, 2.9726, 2.9044, 3.0551,\n 2.9872, 3.1363, 3.0688, 3.0019, 3.1492, 3.0827, 3.0168, 2.9515,\n 3.0967, 3.2408, 3.1755, 3.1109, 3.0467, 2.9832, 3.1251, 3.2660,\n 3.2025, 3.1395, 3.2788, 3.2161, 3.1539, 3.2918, 3.2299, 3.1685,\n 3.1076, 3.0471, 2.9872, 2.9277, 2.8687, 3.0039, 2.9451, 2.8868,\n 2.8288, 2.9625, 2.9048, 2.8475, 2.9798, 2.9227, 2.8660, 2.8098,\n 2.7539, 2.8845, 2.8288, 2.7735, 2.7186, 2.6640, 2.6099, 2.5560,\n 2.5026, 2.6309, 2.5776, 2.7050, 2.8316, 2.9575, 3.0827, 3.2071,\n 3.3309, 3.2768, 3.2230, 3.1696, 3.2921, 3.4140, 3.5351, 3.6556,\n 3.7755, 3.7216, 3.8406, 3.7869, 3.7335, 3.6805, 3.6277, 3.5753,\n 3.6929, 3.8100, 3.9265, 4.0423, 4.1576, 4.2723, 4.2193, 4.1667,\n 4.1143, 4.2280, 4.3412, 4.4538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "153", + "Fraction of T in Greenlist": "76.9%", + "z-score": "16.9", + "p value": "2.14e-64", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.6667, 9.8015, 9.6719, 9.8058, 9.9384, 10.0698,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.8321,\n 10.7098, 10.8327, 10.9546, 11.0755, 11.1954, 11.0761, 11.1954, 11.0780,\n 11.1966, 11.3143, 11.4311, 11.3161, 11.4323, 11.5476, 11.6620, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.3289, 12.4370, 12.3289, 12.4365, 12.5434, 12.6496, 12.5434, 12.6491,\n 12.7542, 12.8586, 12.9624, 13.0656, 13.1681, 13.2701, 13.1665, 13.2681,\n 13.3690, 13.4694, 13.5693, 13.4675, 13.5670, 13.4664, 13.5655, 13.6640,\n 13.7621, 13.6630, 13.7606, 13.8578, 13.9544, 14.0505, 14.1462, 14.2413,\n 14.3360, 14.2390, 14.3333, 14.4272, 14.5206, 14.6135, 14.5181, 14.6107,\n 14.5162, 14.6086, 14.7005, 14.7920, 14.6987, 14.7899, 14.8807, 14.9711,\n 15.0610, 15.1505, 15.2397, 15.3284, 15.2369, 15.3254, 15.4135, 15.5012,\n 15.5885, 15.4983, 15.5853, 15.4959, 15.5828, 15.6692, 15.7553, 15.6670,\n 15.7529, 15.8384, 15.9235, 16.0083, 16.0928, 16.1769, 16.2607, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.5062, 16.4205, 16.5028, 16.4178, 16.5000,\n 16.5819, 16.6634, 16.5793, 16.6607, 16.7417, 16.8225, 16.9030])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: but uh these guys were actually on the road uh two thousand miles from from home when they had to file their uh their final exams and send them in\nHypothesis: These men filed their midterm exams from home. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.0290, 3.2577, 3.4816, 3.3566, 3.2348, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.3147, 3.2026, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 3.1840, 3.3824, 3.2796, 3.4743, 3.6662, 3.8552, 4.0415,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.1008, 4.0024, 3.9056, 4.0825,\n 3.9869, 3.8927, 4.0667, 4.2385, 4.4083, 4.5760, 4.4820, 4.6476,\n 4.8113, 4.7181, 4.6262, 4.5356, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.5899, 4.5035, 4.6603, 4.5747, 4.4901, 4.4066, 4.3241, 4.2426,\n 4.1621, 4.3158, 4.4680, 4.6188, 4.7682, 4.6876, 4.8355, 4.9820,\n 5.1273, 5.0469, 4.9675, 4.8889, 5.0323, 4.9543, 5.0964, 5.0190,\n 4.9424, 4.8666, 4.7916, 4.7173, 4.8572, 4.9960, 5.1338, 5.0596,\n 4.9862, 5.1225, 5.2578, 5.1848, 5.1123, 5.0406, 4.9695, 4.8990,\n 4.8291, 4.9624, 5.0948, 5.2262, 5.1564, 5.2868, 5.4163, 5.5448,\n 5.4752, 5.6028, 5.5336, 5.4650, 5.5915, 5.5233, 5.4557, 5.3886,\n 5.3220, 5.2560, 5.1905, 5.1255, 5.0609, 5.1854, 5.1213, 5.0576,\n 4.9943, 4.9316, 4.8693, 4.8074, 4.7460, 4.8687, 4.9908, 5.1121,\n 5.0507, 4.9897, 5.1100, 5.0494, 4.9891, 5.1085, 5.0485, 5.1671,\n 5.2850, 5.4023, 5.3423, 5.2827, 5.2235, 5.1647, 5.2809, 5.2223,\n 5.1642, 5.1064, 5.0489, 4.9918, 4.9351, 4.8787, 4.8227, 4.9373,\n 4.8815, 4.8260, 4.9397, 5.0529, 5.1655, 5.1100, 5.0548, 5.0000,\n 4.9455, 4.8913, 4.8374, 4.7838, 4.7305, 4.8416, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.4550, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 7.8667, 7.7426, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.4174, 7.5664, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 8.0829, 8.2151, 8.1176, 8.0212, 7.9259,\n 7.8318, 7.9630, 7.8699, 8.0000, 8.1291, 8.0370, 8.1651, 8.0741,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.4868, 8.3976, 8.3093, 8.2219,\n 8.3453, 8.2588, 8.1731, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.1414, 9.2554, 9.1735, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.1577, 9.0786, 9.1905, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.9067, 9.8293, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.8131, 9.9184, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.9178, 11.0165, 10.9431, 10.8702, 10.7978, 10.7258,\n 10.6544, 10.5833, 10.6817, 10.6111, 10.7090, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.6944, 10.7910, 10.8872, 10.8184, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: But you have to have money to save it, and not many couples with young children have the luxury of tucking away $2,000 apiece annually for their Golden Years.\nHypothesis: Not many couples with kids can save up for retirement.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -1.8983, -1.9392, -1.9799, -2.0203, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.7894, -2.8245, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "149", + "Fraction of T in Greenlist": "74.9%", + "z-score": "16.2", + "p value": "1.15e-59", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.9351, 9.8058, 9.9384, 10.0698,\n 10.1999, 10.0737, 10.2030, 10.0791, 10.2075, 10.3347, 10.4608, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.8379, 10.9585, 11.0780,\n 11.1966, 11.3143, 11.1990, 11.3161, 11.4323, 11.5476, 11.4345, 11.5492,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.0044, 12.1136, 12.2221, 12.3299, 12.4370, 12.5434, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.6508, 12.7550, 12.6529, 12.7567, 12.8598,\n 12.9624, 12.8618, 12.9639, 13.0655, 13.1665, 13.2669, 13.1680, 13.2680,\n 13.3674, 13.4664, 13.5647, 13.6626, 13.5654, 13.6629, 13.7599, 13.8564,\n 13.7606, 13.8567, 13.7619, 13.8577, 13.9530, 14.0479, 13.9543, 14.0488,\n 14.1428, 14.2364, 14.3295, 14.2373, 14.3301, 14.4225, 14.5144, 14.6059,\n 14.6970, 14.6062, 14.6970, 14.7874, 14.8773, 14.7877, 14.8773, 14.7885,\n 14.8779, 14.9669, 15.0555, 14.9677, 15.0560, 15.1440, 15.2316, 15.3188,\n 15.2321, 15.3191, 15.4057, 15.4919, 15.5778, 15.6634, 15.5778, 15.6631,\n 15.7481, 15.8327, 15.7481, 15.8325, 15.7485, 15.8327, 15.9165, 16.0000,\n 15.9169, 16.0002, 16.0832, 16.1658, 16.2481, 16.1660, 16.2481])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: and uh as a matter of fact he's a draft dodger\nHypothesis: They dodged the draft, I'll have you know.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "81", + "Fraction of T in Greenlist": "40.7%", + "z-score": "5.12", + "p value": "1.56e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 2.6726, 2.5533, 2.7852, 2.6681, 2.5538, 2.7791, 2.6667,\n 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.6713, 2.8804, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.9814, 3.1787, 3.0796, 3.2733, 3.4641,\n 3.6522, 3.5533, 3.4562, 3.3607, 3.2667, 3.1743, 3.0833, 3.2660,\n 3.4463, 3.3558, 3.5333, 3.4438, 3.3556, 3.5301, 3.7025, 3.6148,\n 3.5283, 3.6979, 3.8657, 3.7796, 3.9452, 3.8600, 3.7758, 3.6927,\n 3.8555, 4.0166, 3.9340, 3.8523, 3.7717, 3.9302, 3.8503, 3.7712,\n 3.6931, 3.8492, 4.0038, 3.9260, 3.8490, 3.7730, 3.9253, 4.0762,\n 4.0004, 4.1497, 4.0745, 4.2222, 4.1475, 4.2938, 4.2196, 4.1461,\n 4.0734, 4.2178, 4.1455, 4.0740, 4.0032, 4.1457, 4.0753, 4.0056,\n 4.1464, 4.0771, 4.0085, 4.1478, 4.0795, 4.0119, 3.9448, 4.0825,\n 4.0158, 3.9497, 4.0859, 4.2212, 4.3554, 4.2893, 4.2237, 4.1586,\n 4.2914, 4.4233, 4.3583, 4.4891, 4.6191, 4.7481, 4.6832, 4.6188,\n 4.5549, 4.4915, 4.4286, 4.3661, 4.4933, 4.6198, 4.5575, 4.6829,\n 4.6209, 4.5594, 4.6838, 4.8074, 4.7460, 4.6850, 4.8076, 4.9295,\n 4.8687, 4.9897, 4.9292, 4.8690, 4.8093, 4.9292, 5.0485, 4.9889,\n 4.9297, 4.8709, 4.9891, 4.9305, 4.8724, 4.8146, 4.9317, 5.0483,\n 4.9906, 4.9333, 4.8763, 4.9918, 5.1068, 5.0499, 5.1642, 5.1075,\n 5.2211, 5.1647, 5.2775, 5.2213, 5.1655, 5.1100, 5.2220, 5.1667,\n 5.1117, 5.0571, 5.1681, 5.1137, 5.0595, 5.1698, 5.1159])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 8.8036, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.0984, 10.0029, 9.9085,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.6827, 11.7851, 11.8870, 11.8010,\n 11.7157, 11.6311, 11.5471, 11.4638, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 11.9197, 12.0180, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.0712, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.6757, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.9691, 13.0608, 12.9845, 12.9087, 12.8333,\n 12.7585, 12.6841, 12.7756, 12.7017, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: 'Publicity.' Lincoln removed his great hat, making a small show of dusting it off.\nHypothesis: Lincoln took his hat off.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 1.1547, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.6013, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.5133, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.3213, 1.4863, 1.4222, 1.5852,\n 1.5213, 1.4580, 1.6186, 1.7778, 1.9355, 2.0918, 2.0276, 1.9640,\n 2.1182, 2.2711, 2.2074, 2.1442, 2.0817, 2.0197, 2.1700, 2.1082,\n 2.0470, 1.9863, 1.9261, 2.0739, 2.0140, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.9242, 1.8664, 1.8091, 1.7522, 1.8953, 1.8385, 1.9803,\n 2.1210, 2.0642, 2.0078, 2.1470, 2.0907, 2.0349, 2.1726, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.2780, 2.2230, 2.1685, 2.1143, 2.0605,\n 2.0071, 1.9540, 2.0868, 2.2188, 2.3500, 2.2966, 2.2436, 2.1909,\n 2.1386, 2.2680, 2.3967, 2.3443, 2.4721, 2.5990, 2.5466, 2.4944,\n 2.4426, 2.5683, 2.5166, 2.4653, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.2377, 2.1886, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.8058, 1.7592, 1.7128, 1.8333,\n 1.9533, 2.0726, 2.1913, 2.1444, 2.0979, 2.2156, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "165", + "Fraction of T in Greenlist": "82.9%", + "z-score": "18.9", + "p value": "1.06e-79", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 9.7312, 9.8754,\n 10.0178, 10.1585, 10.2976, 10.4350, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.8133, 10.9443, 10.7918, 10.9222, 11.0513, 11.1791, 11.3056, 11.4310,\n 11.2848, 11.4097, 11.5333, 11.6559, 11.7773, 11.8977, 11.7572, 11.8771,\n 11.9961, 12.1140, 12.2309, 12.3468, 12.2114, 12.3269, 12.4416, 12.5553,\n 12.6682, 12.7802, 12.6493, 12.7609, 12.8717, 12.9817, 13.0909, 13.1993,\n 13.0725, 13.1806, 13.2879, 13.3945, 13.5004, 13.6056, 13.4825, 13.5873,\n 13.6915, 13.7950, 13.8978, 14.0000, 13.8803, 13.9822, 14.0835, 14.1842,\n 14.2842, 14.3836, 14.2671, 14.3663, 14.4649, 14.5629, 14.6604, 14.7573,\n 14.6437, 14.7404, 14.8365, 14.9321, 15.0272, 15.1217, 15.0108, 15.1052,\n 15.1990, 15.2924, 15.3852, 15.4776, 15.3692, 15.4614, 15.5531, 15.6443,\n 15.7351, 15.8254, 15.7194, 15.8096, 15.8993, 15.9886, 16.0774, 16.1658,\n 16.0620, 16.1503, 16.2381, 16.3255, 16.4125, 16.4992, 16.3975, 16.4839,\n 16.5700, 16.6557, 16.7410, 16.8259, 16.7262, 16.8109, 16.8953, 16.9794,\n 17.0631, 17.1464, 17.0485, 17.1317, 17.2146, 17.2971, 17.3792, 17.4611,\n 17.3649, 17.4466, 17.5280, 17.6090, 17.6897, 17.7701, 17.6756, 17.7559,\n 17.8359, 17.9155, 17.9949, 18.0739, 17.9810, 18.0599, 18.1386, 18.2169,\n 18.2949, 18.3727, 18.2813, 18.3589, 18.4363, 18.5133, 18.5901, 18.6667,\n 18.5767, 18.6531, 18.7292, 18.8051, 18.8807, 18.9561, 18.8675])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In the stock market, however, the damage can get much worse.\nHypothesis: The stock market can experience much worse damage. \nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "92", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "57.6%", + "z-score": "7.22", + "p value": "2.54e-13", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 5.9628, 5.8398, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.0551, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.5627, 2.8098, 2.6811, 2.9212,\n 2.7952, 3.0290, 3.2577, 3.4816, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.6311, 11.7326, 11.6487, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.9669, 12.0660, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.5024, 12.5979, 12.5179, 12.6130, 12.7077,\n 12.6283, 12.7226, 12.8165, 12.9099, 12.8313, 12.7532, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.3967, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.6155, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Missouri was asked to continue its planning efforts and file a supplemental planning report with LSC on or before October 1, 1999.\nHypothesis: Missouri was happy to continue it's planning efforts. \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.9795, 1.8728, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.3445, 2.2418, 2.1412, 2.0428, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.3851, 2.2916, 2.1997, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.6098, 2.8006, 2.9887, 2.8983, 2.8093, 2.9938,\n 2.9057, 2.8189, 2.7333, 2.6491, 2.8292, 2.7456, 2.6632, 2.5820,\n 2.7585, 2.9329, 2.8518, 3.0237, 2.9433, 3.1129, 3.0330, 3.2004,\n 3.1211, 3.0429, 2.9656, 3.1300, 3.0533, 2.9775, 2.9025, 3.0641,\n 2.9897, 2.9161, 3.0754, 3.0022, 2.9299, 3.0870, 3.2426, 3.1704,\n 3.0989, 3.2525, 3.1814, 3.1111, 3.2627, 3.1928, 3.3428, 3.2733,\n 3.2044, 3.3526, 3.4995, 3.4308, 3.3627, 3.2953, 3.4402, 3.3731,\n 3.3066, 3.4499, 3.3838, 3.5256, 3.4599, 3.3947, 3.5350, 3.6742,\n 3.6091, 3.5446, 3.6824, 3.8191, 3.9549, 3.8903, 3.8262, 3.9606,\n 3.8968, 3.8335, 3.7707, 3.7084, 3.8411, 3.7791, 3.7176, 3.6566,\n 3.7878, 3.9181, 3.8571, 3.9865, 3.9258, 4.0541, 3.9936, 4.1210,\n 4.0608, 4.0011, 3.9418, 4.0678, 4.0087, 3.9501, 3.8919, 4.0166,\n 3.9586, 3.9010, 4.0247, 3.9673, 3.9104, 4.0330, 4.1549, 4.0980,\n 4.0415, 4.1624, 4.1061, 4.0501, 4.1700, 4.1143, 4.2334, 4.1779,\n 4.1226, 4.2409, 4.3585, 4.3033, 4.2485, 4.1940, 4.3106, 4.2563,\n 4.2023, 4.3180, 4.2642, 4.3792, 4.3256, 4.2723, 4.3864, 4.5000,\n 4.4468, 4.3938, 4.5066, 4.6188, 4.7305, 4.6775, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 6.1283,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 6.9402,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.8780, 7.6800, 7.4885, 7.6681,\n 7.8445, 8.0178, 7.8360, 8.0076, 8.1763, 8.0018, 7.8320, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.1742, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.0498, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.7419, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 10.1124, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.5027, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.6667, 10.5623, 10.6793, 10.7955, 10.9109,\n 11.0254, 10.9229, 11.0368, 10.9355, 10.8353, 10.9488, 10.8498, 10.9626,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.1172, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 11.8427, 11.9455, 11.8571, 11.9594, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.3754, 12.4746, 12.5732, 12.6713, 12.7690, 12.8661, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 13.0903, 13.0067,\n 12.9238, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.1367, 13.2299,\n 13.1491, 13.0688, 13.1617, 13.0821, 13.1746, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.3060, 13.3967, 13.3196, 13.2429, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.3615, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: He hadn't seen even pictures of such things since the few silent movies run in some of the little art theaters.\nHypothesis: He had recently seen pictures depicting those things.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.0494, 1.9245, 1.8034, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.6977, 1.9215, 2.1412, 2.0428, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.0870, 2.2916, 2.1997, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.3276, 2.2404, 2.4327, 2.6222, 2.8093, 2.7217,\n 2.6354, 2.8189, 2.7333, 2.6491, 2.5660, 2.4841, 2.4034, 2.3238,\n 2.5019, 2.4228, 2.3448, 2.5198, 2.4423, 2.3658, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 2.1669, 2.3349, 2.5011, 2.6656, 2.5927,\n 2.7552, 2.6828, 2.6112, 2.7713, 2.7001, 2.6296, 2.7875, 2.7175,\n 2.8735, 2.8039, 2.7349, 2.6667, 2.8203, 2.7524, 2.9044, 3.0551,\n 3.2044, 3.1363, 3.0688, 3.2163, 3.1492, 3.2953, 3.2285, 3.1623,\n 3.0967, 3.2408, 3.1755, 3.1109, 3.2533, 3.1889, 3.1251, 3.0619,\n 2.9991, 2.9369, 2.8753, 2.8141, 2.7534, 2.6933, 2.6336, 2.7724,\n 2.7129, 2.6540, 2.5954, 2.7325, 2.6742, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.3891, 2.3333, 2.2780, 2.2230, 2.1685, 2.1143, 2.0605,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.7970, 1.7454, 1.8773, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.7529, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.7609, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.5967, 1.7213, 1.6737, 1.6262, 1.7498, 1.7025,\n 1.6555, 1.7780, 1.8999, 2.0212, 1.9738, 1.9267, 2.0470, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.9795, 1.9333, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "59.1%", + "z-score": "11.1", + "p value": "8e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.8905, 7.7784, 7.9216, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.2496,\n 8.3828, 8.2816, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.6976, 8.6035, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.5475, 10.6537, 10.5714, 10.4898,\n 10.4087, 10.3284, 10.4341, 10.3544, 10.2753, 10.3805, 10.4852, 10.4067,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.3827, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 10.8421, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.9178, 11.0165, 10.9431, 11.0414, 10.9685, 10.8961,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.0450, 10.9740, 11.0705, 11.1667,\n 11.0961, 11.1919, 11.1218, 11.0521, 11.1475, 11.0782])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Look, there's a legend here.\nHypothesis: See, there is a well known hero here.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.8980, 0.8337, 0.7701, 0.9428,\n 1.1138, 1.0498, 0.9864, 0.9238, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.3166, 1.2611, 1.2060, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 0.8208, 0.9623,\n 1.1028, 1.2423, 1.1905, 1.1390, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.2472,\n 1.3768, 1.5055, 1.4570, 1.4087, 1.5363, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.3771, 1.3303, 1.2839, 1.4084, 1.5323,\n 1.4857, 1.4393, 1.5621, 1.6843, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.6208, 1.5752, 1.5298, 1.6496, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.9097, 8.0656, 7.9286, 8.0829,\n 7.9489, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 8.9355, 9.0711, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.5191, 9.4088, 9.5368, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.3257, 10.4444, 10.5623, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.7211, 10.6218, 10.5236, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.3497, 11.4574, 11.5645, 11.4714, 11.5779, 11.4857,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 12.0218, 11.9319,\n 11.8427, 11.9455, 11.8571, 11.9594, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.3754, 12.4746, 12.5732, 12.6713, 12.5852, 12.6830, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 12.9116, 12.8285,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.9491, 13.0431, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.5408, 13.6313, 13.5526, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 13.9332, 13.8564, 13.9446, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The Celts arrived in the wake of the Roman withdrawal at the end of the fourth century.\nHypothesis: At the end of the fourth century was when baked goods flourished.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.3402, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.1499, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -0.8704, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.7809, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.5101, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.3166, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.2955, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.1240, 0.0000, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 6.9601, 6.8458, 7.0000, 6.8876, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 9.1590, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.0227,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.6234, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.6201,\n 13.5408, 13.6313, 13.7215, 13.6429, 13.5647, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.8447, 13.9332, 14.0214, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah and how about how about like on the weekends do you do sports or do you go out\nHypothesis: No one plays sports on the weekend.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.7823, 2.0494, 2.3094, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.5275, 1.7321,\n 1.9335, 1.8477, 2.0455, 2.2404, 2.1546, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.9333, 1.8543, 1.7765, 1.6997, 1.8838, 2.0656,\n 2.2453, 2.1678, 2.0913, 2.0158, 2.1918, 2.3658, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 1.9262, 2.0954, 2.0247, 1.9548, 1.8856,\n 2.0517, 2.2162, 2.3791, 2.3094, 2.2405, 2.1723, 2.3324, 2.4910,\n 2.4228, 2.3552, 2.2884, 2.2222, 2.3779, 2.3120, 2.4660, 2.4004,\n 2.3354, 2.2711, 2.2074, 2.1442, 2.0817, 2.0197, 1.9582, 1.8974,\n 2.0470, 1.9863, 1.9261, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.9739, 0.9245, 0.8755, 1.0105, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.9858, 0.9382, 0.8909,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.7789, 0.7336, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.2525, 8.1216, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 8.9169, 8.7986, 8.6820, 8.8192, 8.7045, 8.8405, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 10.9109,\n 10.8082, 10.9229, 11.0368, 10.9355, 10.8353, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.2564, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.5615, 12.4746, 12.5732, 12.6713, 12.7690, 12.8661, 12.9628,\n 13.0590, 12.9732, 12.8881, 12.9840, 13.0795, 12.9952, 12.9116, 12.8285,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.3343, 13.4263, 13.3463, 13.2668, 13.1878, 13.2796,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.4100, 13.5000,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Harlem was our first permanent office, he said. \nHypothesis: Harlem did a great job \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.9415, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 1.8728, 1.7685, 2.0000,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.8477, 2.0455, 1.9604, 2.1546, 2.0702, 2.2611, 2.1773,\n 2.3651, 2.2819, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 2.0656,\n 1.9887, 1.9127, 2.0913, 2.2678, 2.1918, 2.1167, 2.2902, 2.4618,\n 2.3868, 2.5560, 2.4814, 2.4077, 2.5743, 2.5011, 2.4286, 2.5927,\n 2.7552, 2.6828, 2.6112, 2.7713, 2.7001, 2.6296, 2.5600, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.5991, 2.7524, 2.6852, 2.6186,\n 2.5527, 2.7037, 2.8534, 2.7875, 2.9357, 2.8701, 3.0168, 2.9515,\n 3.0967, 3.0317, 2.9673, 2.9035, 2.8402, 2.7775, 2.7153, 2.8577,\n 2.7958, 2.7344, 2.8753, 3.0151, 2.9537, 2.8928, 3.0311, 3.1685,\n 3.1076, 3.2437, 3.3789, 3.3181, 3.4521, 3.5853, 3.5245, 3.4641,\n 3.5960, 3.7270, 3.8571, 3.7966, 3.9258, 4.0541, 3.9936, 3.9337,\n 3.8741, 3.8150, 3.9418, 3.8829, 3.8244, 3.7664, 3.8919, 4.0166,\n 3.9586, 4.0825, 4.0247, 4.1477, 4.0901, 4.2122, 4.1549, 4.0980,\n 4.0415, 3.9853, 3.9294, 3.8740, 3.9945, 3.9392, 3.8843, 4.0038,\n 4.1226, 4.0678, 4.0132, 4.1312, 4.2485, 4.3652, 4.3106, 4.2563,\n 4.2023, 4.1487, 4.0953, 4.2108, 4.3256, 4.2723, 4.2193, 4.3333,\n 4.4468, 4.3938, 4.5066, 4.6188, 4.5659, 4.5134, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.1176, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.4501, 9.3611, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.6683, 10.7719,\n 10.6929, 10.6145, 10.5366, 10.6397, 10.7423, 10.6650, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.8170, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.1637, 11.0913, 11.1883, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: But when the cushion is spent in a year or two, or when the next recession arrives, the disintermediating voters will find themselves playing the roles of budget analysts and tax wonks.\nHypothesis: The cushion will likely be spent in under two years.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.0451, 3.9158, 3.7897, 4.0000,\n 3.8765, 3.7559, 3.6380, 3.5228, 3.4101, 3.2998, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.0817, 2.9814, 3.1787, 3.0796, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.6098, 2.5205, 2.7107, 2.8983, 3.0833, 2.9938,\n 2.9057, 2.8189, 2.7333, 2.9140, 2.8292, 2.7456, 2.6632, 2.5820,\n 2.5019, 2.4228, 2.3448, 2.5198, 2.6928, 2.6148, 2.5378, 2.4618,\n 2.3868, 2.3126, 2.2393, 2.1669, 2.0954, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.9149, 1.8475, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 1.0659, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.6584, 0.7878, 0.9165, 0.8704,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.6%", + "z-score": "10.9", + "p value": "4.93e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.1550, 7.3485,\n 7.1358, 7.3271, 7.5144, 7.3131, 7.1187, 7.3054, 7.1187, 7.3030,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.5340, 7.3853, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 8.0632, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.2496,\n 8.1483, 8.0483, 7.9495, 8.0829, 7.9853, 8.1176, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.7104, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.6924, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.2240, 9.1414, 9.0595, 8.9783, 9.0923, 9.0117, 9.1250, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.7224, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 9.8433, 9.9481, 10.0523, 9.9778,\n 10.0814, 10.1846, 10.1106, 10.0371, 10.1398, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.2273, 10.3280, 10.4281, 10.3566, 10.4563, 10.5556,\n 10.4846, 10.4140, 10.5128, 10.6111, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.5974, 10.6944, 10.7910, 10.7222, 10.8184, 10.9141])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Oh, what a fool I feel! \nHypothesis: I am beyond proud.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.6678, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.4027, 1.5843, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.4446, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.9733, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 1.0593, 1.0050, 0.9512, 1.0973, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.3663, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.2136, 1.3443, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.0096, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 0.9062, 1.0328, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.8866, 1.0106, 1.1339, 1.0890, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.7%", + "z-score": "15.5", + "p value": "1.5e-54", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.7337, 6.9282, 6.7390, 6.9307, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.1952, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.1261, 6.9903, 7.1554, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 9.1455, 9.0213, 9.1589, 9.2952,\n 9.1735, 9.3086, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.2375, 10.1234, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.5655,\n 11.6772, 11.7881, 11.8982, 12.0077, 11.9029, 11.7992, 11.9083, 12.0167,\n 12.1244, 12.2314, 12.1295, 12.2360, 12.3419, 12.4471, 12.3468, 12.2474,\n 12.1491, 12.2541, 12.1568, 12.2615, 12.3655, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.5853, 12.6867, 12.7875, 12.6939, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.6287, 12.5394, 12.4508, 12.5503,\n 12.4625, 12.3754, 12.4746, 12.5732, 12.6713, 12.7690, 12.8661, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.3447, 13.2593, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.9111, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.3642, 14.4536, 14.5426, 14.6313, 14.7195, 14.8074, 14.8950, 14.9821,\n 14.8991, 14.9860, 15.0726, 15.1587, 15.2446, 15.3301, 15.2481, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.5060, 15.4254, 15.5095])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: She has exchanged a hollow life for a heightened life, and has tried to comprehend all its turns, get its possibilities.\nHypothesis: She has chose to live a hollow life.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.0509, 2.9212,\n 3.1558, 3.0290, 2.9055, 2.7852, 2.6681, 2.5538, 2.4422, 2.6667,\n 2.8868, 3.1027, 3.3147, 3.2026, 3.0929, 2.9856, 3.1918, 3.0861,\n 3.2883, 3.1840, 3.0817, 2.9814, 2.8830, 2.7863, 2.9823, 3.1754,\n 3.3657, 3.2691, 3.4562, 3.3607, 3.2667, 3.1743, 3.0833, 2.9938,\n 2.9057, 3.0873, 3.0000, 2.9140, 2.8292, 2.7456, 2.6632, 2.5820,\n 2.7585, 2.6778, 2.5983, 2.7717, 2.6928, 2.6148, 2.7854, 2.7080,\n 2.8764, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.3570,\n 2.2862, 2.2162, 2.1470, 2.0785, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.9720, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.7767, 1.7154, 1.8682, 1.8071, 1.7465, 1.8974,\n 1.8370, 1.9863, 1.9261, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.0879, 1.2257, 1.1746, 1.1239,\n 1.2603, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.6437, 0.5991, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.6790, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.3970, 4.2844, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.4094, 9.3088, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.0779, 9.9813, 9.8858, 9.7912, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.5841, 10.4956,\n 10.4079, 10.3209, 10.4307, 10.3445, 10.2592, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.0940, 11.0125, 11.1154, 11.2178, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.6412, 11.7405, 11.6606, 11.7595,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.8956, 11.9927, 11.9147, 12.0114,\n 11.9340, 11.8571, 11.7808, 11.7050, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 11.9380, 12.0327, 12.1270, 12.2209, 12.3143, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: well the first thing for me is i wonder i see a couple of different ways of talking about what privacy is um if privacy is something that disturbs your private state i mean an invasion of privacy is something that disturbs your private state that's one thing and if privacy is something that comes into your private state and extracts information from it in other words finds something out about you that's another and the first kind of invasion of the first type of privacy seems invaded to me in very much everyday in this country but in the second type at least overtly uh where someone comes in and uh finds out information about you that should be private uh does not seem uh um obviously everyday\nHypothesis: All invasions of privacy should be severely punished, because it will teach the criminals that it is not worth doing.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.4407, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 10.9773, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.3910, 11.3150,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.4581, 11.5549, 11.6514, 11.5771,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.1170,\n 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641, 3.2206, 3.5382,\n 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140, 4.5033, 4.3027, 4.5556,\n 4.8008, 4.6101, 4.8488, 4.6663, 4.4907, 4.7237, 4.5547, 4.7819, 5.0037,\n 5.2204, 5.0576, 4.8999, 5.1121, 5.3199, 5.1671, 5.3708, 5.2223, 5.0779,\n 5.2778, 5.1371, 5.3333, 5.5261, 5.7155, 5.5783, 5.4444, 5.6307, 5.8140,\n 5.6830, 5.5549, 5.7354, 5.6099, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919,\n 6.0622, 5.9438, 5.8275, 5.9954, 6.1612, 6.0469, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.3008, 6.1968, 6.3509,\n 6.5033, 6.4006, 6.2994, 6.4501, 6.3502, 6.2517, 6.4008, 6.3035, 6.4510,\n 6.5970, 6.7416, 6.6454, 6.5504, 6.6935, 6.8354, 6.7414, 6.6486, 6.7890,\n 6.6973, 6.6066, 6.7456, 6.6559, 6.7937, 6.9303, 7.0657, 6.9768, 6.8889,\n 7.0231, 7.1563, 7.0692, 6.9830, 7.1149, 7.0296, 6.9451, 7.0759, 6.9923,\n 7.1220, 7.2508, 7.3786, 7.2956, 7.2134, 7.3402, 7.4661, 7.3845, 7.3037,\n 7.4286, 7.3485, 7.2691, 7.3930, 7.3143, 7.4373, 7.5595, 7.6808, 7.6026,\n 7.5251, 7.6456, 7.7653, 7.6883, 7.6120, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.9729, 7.8988, 7.8253, 7.9403, 8.0546, 7.9816,\n 7.9091, 8.0227, 7.9507, 7.8793, 7.9921, 7.9211, 8.0333, 8.1448, 8.2557,\n 8.1851, 8.1150, 8.2252, 8.3349, 8.2652, 8.1960, 8.3050, 8.2362, 8.1679,\n 8.2762, 8.2084, 8.3161, 8.4232, 8.5298, 8.4623, 8.3952, 8.5012, 8.6066,\n 8.5399, 8.4736, 8.5785, 8.5126, 8.4471, 8.5513, 8.4862, 8.5899, 8.6932,\n 8.7959, 8.7311, 8.6667, 8.7689, 8.8706, 8.8065, 8.7427, 8.8439, 8.7805,\n 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: He had never felt better.\nHypothesis: The medicine he had taken had worked well.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "38.5%", + "z-score": "3.26", + "p value": "0.000552", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.3333,\n 1.5671, 1.7963, 2.0211, 1.9215, 1.8240, 2.0428, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.0870, 2.2916, 2.4930, 2.4004, 2.5981,\n 2.5064, 2.7005, 2.8919, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.6354, 2.8189, 3.0000, 3.1789, 3.3556, 3.5301, 3.4427, 3.6148,\n 3.5283, 3.4429, 3.3587, 3.2757, 3.1937, 3.3619, 3.5282, 3.6927,\n 3.6107, 3.5298, 3.4498, 3.3708, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.2242, 3.3826, 3.3075, 3.2332, 3.3895, 3.3156, 3.4702, 3.3968,\n 3.3243, 3.2525, 3.1814, 3.3333, 3.2627])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 5.4611, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.7543, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.8667, 9.0068, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.7306, 9.8590, 9.7442, 9.8716, 9.7586, 9.8852, 9.7738, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 9.9469, 10.0692, 9.9640,\n 10.0855, 9.9817, 9.8792, 9.7778, 9.8987, 9.7986, 9.6995, 9.6016,\n 9.7219, 9.6251, 9.5294, 9.6490, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.6059, 11.5156, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.1622, 12.2628, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.2034, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.3455, 12.4430, 12.3603, 12.4575, 12.3754, 12.2940,\n 12.2132, 12.3100, 12.2298, 12.1502, 12.0712, 12.1677, 12.0893, 12.0114,\n 12.1076, 12.0302, 12.1260, 12.2214, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.6155, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The campaigns seem to reach a new pool of contributors.\nHypothesis: New people chose to donate to the cause \nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.4407, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 10.9773, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.0468,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.3910, 11.3150,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.4581, 11.5549, 11.6514, 11.5771,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "163", + "Fraction of T in Greenlist": "81.9%", + "z-score": "18.5", + "p value": "4.91e-77", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 8.8426, 8.6667,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.6600, 10.7918, 10.9222, 10.7732, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.6189,\n 11.4829, 11.6039, 11.7239, 11.8429, 11.9609, 12.0779, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.4072, 12.2794, 12.3928, 12.5053, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 13.8497, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.6283, 14.5173, 14.4075, 14.5045, 14.6010, 14.6969,\n 14.7924, 14.8873, 14.9817, 15.0756, 15.1690, 15.2619, 15.1556, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.7680, 15.6667, 15.7560, 15.8450, 15.9335, 16.0216, 16.1093,\n 16.1966, 16.2835, 16.3700, 16.4561, 16.3575, 16.2598, 16.3459, 16.4317,\n 16.5171, 16.6021, 16.6868, 16.7711, 16.8550, 16.9386, 17.0218, 16.9265,\n 16.8320, 16.9152, 16.9982, 17.0807, 17.1630, 17.2449, 17.3265, 17.4078,\n 17.4887, 17.5693, 17.4770, 17.3854, 17.4660, 17.5464, 17.6264, 17.7061,\n 17.7856, 17.8647, 17.9435, 18.0221, 18.1003, 18.0107, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.3103, 18.3871, 18.4637, 18.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The Data Warehousing Institute provides education and training in the data warehousing and business intelligence industry.\nHypothesis: Business intelligence industry is a new and promising field of study.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.6865, -1.7233, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 3.6566, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.4550, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.3923, 10.2833, 10.1756, 10.0692, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.0000, 9.8987, 10.0188, 9.9187, 10.0380,\n 9.9392, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.3898, 10.2975, 10.4103,\n 10.3191, 10.2287, 10.1391, 10.2514, 10.3630, 10.2743, 10.3853, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.4537, 10.5621, 10.4769, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.8204, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.2992, 11.2178, 11.1370,\n 11.0569, 10.9773, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.7000, 11.7980, 11.7200, 11.6425, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.7808, 11.8771, 11.8014, 11.7261, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.9586, 12.0529, 12.1468, 12.2403, 12.3333,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: the net cost of operations.\nHypothesis: The gross cost.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "8.0%", + "z-score": "-5.53", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.3301, -2.3851, -2.4394, -2.4930, -2.5459, -2.5981,\n -2.6496, -2.7005, -2.7508, -2.8006, -2.8497, -2.8983, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.3190, -3.1129, -3.1568, -3.2004,\n -3.2435, -3.2863, -3.3288, -3.3708, -3.4125, -3.4538, -3.4949, -3.5355,\n -3.5759, -3.6159, -3.6556, -3.6950, -3.5044, -3.5443, -3.5839, -3.6233,\n -3.6623, -3.7011, -3.7396, -3.7778, -3.8157, -3.8534, -3.8908, -3.9279,\n -3.9648, -4.0015, -4.0379, -3.8596, -3.8965, -3.9331, -3.9694, -4.0056,\n -4.0415, -4.0771, -4.1126, -4.1478, -4.1828, -4.2176, -4.2522, -4.2866,\n -4.3208, -4.3548, -4.1868, -4.2212, -4.2553, -4.2893, -4.3231, -4.3566,\n -4.3900, -4.4233, -4.4563, -4.4891, -4.5218, -4.5543, -4.5866, -4.6188,\n -4.6508, -4.4915, -4.5238, -4.5560, -4.5879, -4.6198, -4.6514, -4.6829,\n -4.7143, -4.7455, -4.7765, -4.8074, -4.8381, -4.8687, -4.8992, -4.9295,\n -4.7777, -4.8083, -4.8387, -4.8690, -4.8992, -4.9292, -4.9591, -4.9889,\n -5.0185, -5.0480, -5.0774, -5.1066, -5.1357, -5.1647, -5.1936, -5.0483,\n -5.0774, -5.1064, -5.1352, -5.1640, -5.1926, -5.2211, -5.2495, -5.2778,\n -5.3060, -5.3340, -5.3619, -5.3898, -5.4175, -5.4451, -5.3055, -5.3333,\n -5.3611, -5.3887, -5.4162, -5.4436, -5.4709, -5.4981, -5.5252])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 7.8766, 8.0358, 7.8923, 8.0498, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.7250, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.4560, 9.5876, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.0107, 9.8995,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 10.8082, 10.7066, 10.8215, 10.9355, 11.0488, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.3091, 11.4184, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 11.9060, 12.0096, 11.9187, 11.8287, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.5615, 12.4746, 12.5732, 12.6713, 12.7690, 12.8661, 12.7802,\n 12.8769, 12.9732, 12.8881, 12.8037, 12.7199, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.0185, 12.9363, 13.0307, 13.1246, 13.2182, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.3343, 13.2542, 13.3463, 13.4380, 13.5292, 13.4499,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.9007, 13.8222, 13.7442, 13.8333,\n 13.7559, 13.8447, 13.7679, 13.8564, 13.7801, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Credibility is a vital factor, and Jim Lehrer does, indeed, have it.\nHypothesis: Jim Lehrer has no credibility whatsoever.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.48", + "p value": "0.0688", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.9869, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 0.9631, 0.8980, 1.0719, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 1.1882,\n 1.1345, 1.0812, 1.2243, 1.1711, 1.3128, 1.2597, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.3288, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.0598, 1.1942, 1.1447, 1.0954,\n 1.2285, 1.3608, 1.3114, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.1380, 1.2657, 1.3926,\n 1.3453, 1.2982, 1.4241, 1.3771, 1.3303, 1.2839, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.2883, 1.4093, 1.5298, 1.4846])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "158", + "Fraction of T in Greenlist": "79.4%", + "z-score": "17.7", + "p value": "1.43e-70", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.3255, 6.5354, 6.7402, 6.9402,\n 6.7338, 6.5350, 6.7337, 6.9282, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 8.9567, 9.1084, 9.2582,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.3881, 9.2388, 9.3834, 9.5263,\n 9.3811, 9.2387, 9.3811, 9.5219, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.4512, 11.5706,\n 11.4442, 11.3196, 11.4388, 11.5570, 11.4349, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.7809, 11.8953, 12.0089, 12.1216, 12.2336, 12.3447, 12.4550,\n 12.5646, 12.6735, 12.7815, 12.8889, 12.9955, 12.8813, 12.9875, 13.0931,\n 12.9807, 13.0859, 13.1904, 13.2942, 13.1839, 13.0748, 13.1785, 13.2816,\n 13.1741, 13.0677, 13.1707, 13.2730, 13.3747, 13.4758, 13.5764, 13.6763,\n 13.7757, 13.8745, 13.9728, 14.0705, 14.1677, 14.2644, 14.3605, 14.4562,\n 14.5513, 14.4493, 14.5442, 14.6385, 14.5379, 14.6319, 14.7255, 14.8187,\n 14.7195, 14.6212, 14.7143, 14.8069, 14.7098, 14.6135, 14.7060, 14.7981,\n 14.8896, 14.9808, 15.0715, 15.1618, 15.2517, 15.3411, 15.4302, 15.5188,\n 15.6070, 15.6949, 15.7823, 15.8694, 15.9561, 15.8631, 15.9496, 16.0357,\n 15.9437, 16.0296, 16.1151, 16.2003, 16.1095, 16.0194, 16.1045, 16.1892,\n 16.1000, 16.0115, 16.0961, 16.1805, 16.2644, 16.3481, 16.4314, 16.5144,\n 16.5970, 16.6793, 16.7614, 16.8430, 16.9244, 17.0055, 17.0862, 17.1667,\n 17.2468, 17.3267, 17.4062, 17.4855, 17.5644, 17.6431, 17.7215])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Tuppence rose.\nHypothesis: Tuppence floated into the air.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.6977, 1.6013, 1.8240, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.7634, 1.9604, 1.8766, 2.0702, 1.9870, 1.9052,\n 2.0948, 2.2819, 2.2000, 2.1193, 2.0397, 1.9612, 2.1436, 2.0656,\n 2.2453, 2.1678, 2.0913, 2.0158, 1.9413, 2.1167, 2.0426, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.1669, 2.0954, 2.2629, 2.4286, 2.3570,\n 2.5207, 2.4495, 2.3791, 2.3094, 2.4703, 2.4010, 2.3324, 2.4910,\n 2.4228, 2.3552, 2.5117, 2.6667, 2.5991, 2.5322, 2.6852, 2.6186,\n 2.5527, 2.4874, 2.4227, 2.3586, 2.5087, 2.4449, 2.5934, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.4887, 2.4271, 2.5717, 2.5103, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.4121, 2.5532, 2.4938, 2.6336, 2.5744,\n 2.5156, 2.6540, 2.7913, 2.7325, 2.8687, 2.8101, 2.7520, 2.8868,\n 3.0206, 2.9625, 2.9048, 3.0373, 2.9798, 2.9227, 2.8660, 2.8098,\n 2.9406, 2.8845, 2.8288, 2.9584, 2.9029, 2.8478, 2.7930, 2.7386,\n 2.8666, 2.8124, 2.9394, 2.8853, 2.8316, 2.7783, 2.7253, 2.6726,\n 2.7979, 2.9225, 2.8698, 2.9935, 2.9410, 2.8887, 2.8368, 2.9593,\n 2.9076, 2.8561, 2.9776, 2.9263, 2.8752, 2.9957, 3.1156, 3.0645,\n 3.0138, 3.1327, 3.0821, 3.0317, 2.9817, 2.9320, 2.8825, 3.0000,\n 2.9507, 3.0674, 3.0182, 2.9692, 2.9205, 2.8721, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "164", + "Fraction of T in Greenlist": "82.4%", + "z-score": "18.7", + "p value": "2.31e-78", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.7967, 8.9455, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.3831, 9.5229, 9.3871, 9.5258,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.9634, 11.0870, 11.2094, 11.0818, 11.2036, 11.3244,\n 11.4442, 11.3196, 11.4388, 11.3163, 11.4349, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.7809, 11.8953, 12.0089, 12.1216, 12.2336, 12.3447, 12.4550,\n 12.5646, 12.6735, 12.7815, 12.8889, 12.7743, 12.8813, 12.9875, 13.0931,\n 13.1979, 13.3022, 13.1904, 13.2942, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.5940, 13.6950, 13.7953, 13.8952, 13.9944, 14.0930, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.4725, 14.5682, 14.6634, 14.7580, 14.8522,\n 14.9459, 14.8425, 14.9359, 14.8337, 14.9269, 15.0195, 15.1118, 15.2036,\n 15.2949, 15.3858, 15.4762, 15.5662, 15.6558, 15.7449, 15.8336, 15.9220,\n 16.0099, 16.0974, 15.9990, 16.0863, 16.1732, 16.2598, 16.3459, 16.4317,\n 16.3351, 16.4207, 16.3250, 16.4104, 16.4954, 16.5801, 16.6644, 16.7484,\n 16.8320, 16.9152, 16.9982, 17.0807, 17.1630, 17.2449, 17.3265, 17.4078,\n 17.4887, 17.3962, 17.4770, 17.5575, 17.6377, 17.7176, 17.7971, 17.7061,\n 17.7856, 17.6954, 17.7746, 17.8536, 17.9323, 18.0107, 18.0888, 18.1667,\n 18.2442, 18.3215, 18.3985, 18.4752, 18.5517, 18.6278, 18.7038])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: From the corner of his eye he saw Jamus look over the broken mare.\nHypothesis: Jamus looked over the mare.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "80", + "Fraction of T in Greenlist": "40.2%", + "z-score": "4.95", + "p value": "3.67e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.9869, 0.9152, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.4771,\n 1.4076, 1.5823, 1.5131, 1.6854, 1.6164, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.8475, 2.0107, 1.9437, 2.1049, 2.0381,\n 2.1974, 2.1309, 2.2884, 2.2222, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.3354, 2.4874, 2.4227, 2.5731, 2.5087, 2.6575, 2.5934, 2.7406,\n 2.6768, 2.8226, 2.7591, 2.9035, 2.8402, 2.7775, 2.7153, 2.8577,\n 2.9991, 2.9369, 3.0770, 3.0151, 3.1539, 3.0923, 3.2299, 3.1685,\n 3.3049, 3.2437, 3.3789, 3.3181, 3.2577, 3.1977, 3.3314, 3.4641,\n 3.4042, 3.5359, 3.4762, 3.6068, 3.5474, 3.6770, 3.6178, 3.7463,\n 3.6874, 3.8150, 3.7563, 3.6980, 3.6401, 3.7664, 3.8919, 3.8341,\n 3.9586, 3.9010, 4.0247, 3.9673, 4.0901, 4.0330, 4.1549, 4.0980,\n 4.2191, 4.1624, 4.1061, 4.0501, 4.1700, 4.2893, 4.2334, 4.3519,\n 4.2962, 4.4140, 4.3585, 4.4754, 4.4202, 4.5364, 4.4813, 4.5968,\n 4.5419, 4.6567, 4.6020, 4.5476, 4.4936, 4.6074, 4.7206, 4.6667,\n 4.7792, 4.7255, 4.8374, 4.7838, 4.8950, 4.8416, 4.9522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.9589, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.5769, 10.6894, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.3944, 11.3039, 11.4101, 11.3204, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.4015, 12.3167, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.1746, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.3060, 13.3967, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Even if you're the kind of traveler who likes to improvise and be adventurous, don't turn your nose up at the tourist offices.\nHypothesis: There's nothing worth seeing in the tourist offices.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "28.8%", + "z-score": "0.981", + "p value": "0.163", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.5396, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.8889, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 6.7254, 6.8995, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.3033, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 7.8428, 7.7414, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.1176, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.4868, 8.6102, 8.5210, 8.6436,\n 8.7652, 8.6770, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.4513, 9.5638, 9.4812, 9.5931, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 10.9220, 10.8443, 10.7671, 10.8686,\n 10.9697, 11.0702, 10.9936, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.0661, 11.1648, 11.0904, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.7169, 11.6441, 11.7389, 11.8333,\n 11.7611, 11.6893, 11.7833, 11.7120, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Similarly, OIM revised the electronic Grant Renewal Application to accommodate new information sought by LSC and to ensure greater ease for users.\nHypothesis: Changes were made to the Grant Renewal Application to provide extra information to the LSC.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "101", + "# Tokens in Greenlist": "22", + "Fraction of T in Greenlist": "21.8%", + "z-score": "-0.747", + "p value": "0.772", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -0.8553, -0.6537, -0.7145, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "157", + "Fraction of T in Greenlist": "78.9%", + "z-score": "17.6", + "p value": "2.59e-69", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.2598, 6.4902, 6.1968, 6.4254, 6.6469, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 7.4194, 7.6120, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 8.2952, 8.4678, 8.2577, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.8926, 9.0520, 8.8648, 9.0233, 9.1795, 9.3333,\n 9.1551, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 10.0426, 10.1840,\n 10.3237, 10.4618, 10.2976, 10.4350, 10.5709, 10.7052, 10.5472, 10.6810,\n 10.8133, 10.9443, 11.0739, 10.9222, 11.0513, 11.1791, 11.3056, 11.4310,\n 11.5551, 11.6781, 11.5333, 11.6559, 11.7773, 11.8977, 11.7572, 11.6189,\n 11.7395, 11.8589, 11.9774, 11.8429, 11.9609, 12.0779, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.6493, 12.7609, 12.6322, 12.7435, 12.8540, 12.9636,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.1482, 13.2549, 13.3609,\n 13.4661, 13.5707, 13.6746, 13.7778, 13.6591, 13.7620, 13.8643, 13.9659,\n 13.8497, 13.7347, 13.8364, 13.9375, 14.0379, 13.9251, 14.0253, 14.1248,\n 14.2238, 14.3222, 14.4200, 14.5173, 14.4075, 14.5045, 14.6010, 14.6969,\n 14.5890, 14.6847, 14.7799, 14.8746, 14.9687, 14.8629, 14.9568, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.3158, 15.4072, 15.4980, 15.5885,\n 15.4867, 15.3858, 15.4762, 15.5662, 15.6558, 15.5563, 15.6457, 15.7346,\n 15.8232, 15.9113, 15.9990, 16.0863, 15.9889, 16.0760, 16.1628, 16.2491,\n 16.1531, 16.2392, 16.3250, 16.4104, 16.4954, 16.4009, 16.4857, 16.5702,\n 16.6543, 16.7381, 16.8216, 16.9047, 16.8118, 16.8948, 16.9774, 17.0596,\n 16.9680, 16.8770, 16.9592, 17.0411, 17.1227, 17.0328, 17.1143, 17.1954,\n 17.2762, 17.3567, 17.4369, 17.3483, 17.4284, 17.5081, 17.5875, 17.5000,\n 17.5793, 17.6583, 17.5716, 17.4855, 17.5644, 17.4790, 17.5578])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: 'Dave Hanson, to whom nothing was impossible.' Well, we have a nearly impossible task: a task of engineering and building.\nHypothesis: This building job will be very difficult to complete.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.5092, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.2516, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.2257, -1.2686, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.6087, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.2016, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.4853, 7.3901, 7.5258, 7.6603, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.3503, 8.2572, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 8.9815,\n 8.8958, 9.0134, 8.9285, 9.0453, 9.1615, 9.2768, 9.1927, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.6016, 9.7109, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.9846, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.2516, 10.1749, 10.2790, 10.3827, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.8170, 10.9176, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.0661, 11.1648, 11.2630, 11.3608, 11.4581, 11.5549, 11.4806, 11.5771,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.8117, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.0419, 12.1347, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Still, it would be interesting to know. 109 Poirot looked at me very earnestly, and again shook his head. \nHypothesis: Poirot did not look at me.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, 0.1063, 0.2646, 0.4216,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.6983, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.2100, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, 0.0000, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.4839, 7.6615, 7.8360, 8.0076, 8.1763, 8.0018, 8.1689, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.1684, 8.0139, 8.1742, 8.0238,\n 7.8766, 7.7326, 7.5916, 7.4536, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 7.9472, 8.0928, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.6747, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 11.8870, 11.8010,\n 11.9024, 11.8172, 11.7326, 11.6487, 11.5655, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.0532, 11.9741, 11.8956, 11.9927, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.5049, 12.4283,\n 12.3523, 12.4460, 12.3705, 12.4638, 12.3888, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.7017, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: But I'll take up my stand somewhere near, and when he comes out of the building I'll drop a handkerchief or something, and off you go!\"\nHypothesis: I want you to follow him, so watch for the signal that I give.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.89", + "p value": "0.97", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.7780, -1.8155, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -1.8511, -1.8874])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 9.0060, 8.9178, 9.0370, 8.9496, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.0453, 8.9612, 9.0773, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.4812, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.6814, 9.6016, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.0416,\n 9.9648, 10.0701, 9.9940, 10.0987, 10.2029, 10.1273, 10.0523, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.4909, 10.5921, 10.5181, 10.6187,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.5998, 10.6990, 10.6271, 10.5556,\n 10.6544, 10.5833, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.0261, 10.9564, 11.0521, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: There are no shares of a stock that might someday come back, just piles of options as worthless as those shares of Cook's American Business Alliance.\nHypothesis: Cook's American Business Alliance caused shares of stock to come back.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.8257,\n 1.7132, 1.9599, 2.2011, 2.0889, 1.9795, 1.8728, 2.1054, 2.0000,\n 2.2269, 2.4495, 2.6679, 2.8823, 2.7757, 2.6713, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094, 2.0207,\n 1.9335, 2.1320, 2.3276, 2.5205, 2.7107, 2.8983, 3.0833, 2.9938,\n 2.9057, 2.8189, 2.7333, 2.9140, 3.0924, 3.0071, 2.9231, 2.8402,\n 2.7585, 2.6778, 2.5983, 2.5198, 2.4423, 2.3658, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 2.1669, 2.0954, 2.0247, 2.1917, 2.1213,\n 2.0517, 1.9829, 1.9149, 1.8475, 1.7809, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.9066, 1.8419, 2.0000, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.9009, 1.8385, 1.7767, 1.9298, 2.0817, 2.2323, 2.1700, 2.1082,\n 2.2569, 2.1954, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 2.1268, 2.2699, 2.2111, 2.1527, 2.0948, 2.0373, 1.9803,\n 1.9237, 1.8676, 1.8119, 1.7566, 1.7018, 1.8411, 1.7864, 1.9245,\n 1.8699, 2.0068, 1.9524, 1.8983, 1.8446, 1.7913, 1.9263, 2.0605,\n 2.0071, 1.9540, 2.0868, 2.0339, 1.9813, 1.9291, 1.8773, 2.0083,\n 1.9566, 2.0866, 2.2159, 2.1640, 2.1125, 2.2406, 2.3679, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 1.9419, 1.8935, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.8999, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.3083, 4.1603, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.2016, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.4526, 8.3625, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.9178, 8.8304, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.0134, 8.9285, 9.0453, 9.1615, 9.0773, 8.9940, 8.9113,\n 8.8294, 8.7482, 8.8636, 8.9783, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.2697, 9.3810, 9.4916, 9.6016, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.5066, 9.4299, 9.3537, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.7072, 9.8131, 9.9184, 9.8433, 9.7688, 9.8736, 9.9778,\n 10.0814, 10.0074, 9.9340, 10.0371, 10.1398, 10.0668, 9.9944, 9.9224,\n 9.8510, 9.7800, 9.8821, 9.9837, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.3148, 10.2447, 10.3439, 10.4427, 10.5410, 10.4713, 10.4021, 10.5000,\n 10.5974, 10.5286, 10.4603, 10.3923, 10.3248, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: One thing was worrying me dreadfully, but my heart gave a great throb of relief when I saw my ulster lying carelessly over the back of a chair.\nHypothesis: I was dreadfully worried about many things. \nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.1459, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.1232, 6.9488, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 7.8512, 8.0139, 7.8628, 8.0238,\n 7.8766, 7.7326, 7.8923, 8.0498, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.7250, 8.6000, 8.7419, 8.6192, 8.7599, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.3901, 9.5191, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.1379, 10.0380,\n 9.9392, 9.8414, 9.9601, 9.8634, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.1621, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.0371, 11.1435, 11.0562, 10.9697,\n 11.0756, 10.9898, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.3423, 11.2589, 11.3616, 11.2789, 11.3812, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.9197, 11.8393, 11.9377,\n 12.0355, 12.1329, 12.0532, 11.9741, 11.8956, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.8571, 11.9534, 12.0493, 12.1447, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.1073, 12.2016, 12.2954, 12.3888, 12.3143, 12.4074, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Do you think Mrs. Inglethorp made a will leaving all her money to Miss Howard? I asked in a low voice, with some curiosity. \nHypothesis: I yelled at the top of my lungs.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.0605, -0.8889, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -1.8598, -1.7219, -1.7609, -1.6241, -1.6632, -1.7021, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.7404, -1.7780, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "61.1%", + "z-score": "10.8", + "p value": "2.46e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.3659, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.1455, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.1756, 10.0692, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.0000, 9.8987, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.4083, 10.3110, 10.2146, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.3695, 10.2763, 10.3898, 10.2975, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.7141, 10.6265, 10.5397, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.8838, 10.7987, 10.9048, 11.0102, 11.1151, 11.0309, 10.9473, 10.8644,\n 10.7822, 10.7006, 10.6196, 10.7242, 10.8282, 10.7480, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 10.9220, 10.8443, 10.7671])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Isn't a woman's body her most personal property?\nHypothesis: Women's bodies belong to themselves, they should decide what to do with it. \nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "187", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "15.0%", + "z-score": "-3.17", + "p value": "0.999", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.2632, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.0247, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.6340, -1.6823, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.7325, -2.5769, -2.4225, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.3962, -2.2478,\n -2.2871, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -3.1327, -3.1665])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.5354, 6.3254, 6.5320,\n 6.7338, 6.5350, 6.3434, 6.1584, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.3560, 8.2369, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.5337, 8.6678, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.0067, 9.1343, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.0779, 10.1948, 10.0984, 10.0029, 9.9085,\n 10.0249, 9.9315, 10.0472, 9.9547, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.4524, 10.5632, 10.4738, 10.5841, 10.6936,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.6311, 11.7326, 11.8336, 11.7498, 11.8503, 11.9504, 12.0499,\n 11.9669, 12.0660, 11.9837, 12.0824, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.3263, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.2572, 13.1815, 13.2717, 13.3615, 13.2864, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: that your approach is is is right you can actually go out and sub it if even if you don't wanna get hands on you can even just sub it out the concrete and those kind of things and and that's kind of the plan i have so um uh everyone i talk to uh i've\nHypothesis: You can sub it even if you do not want to get your hands on it.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.3475, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.1122, 0.0558, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, 0.0521, 0.2074, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.1803, -0.0449, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, -0.0420, -0.0838, -0.1253, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "65.2%", + "z-score": "13", + "p value": "3.27e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 8.8853, 8.7758, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.0577, 9.9601, 9.8634, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.2514, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.6920, 10.7987, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.3423, 11.2589, 11.3616, 11.4638, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.7031, 11.6219, 11.5414, 11.6412, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.2638, 12.3595,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.8007, 12.7248, 12.6494, 12.5745, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: What changed?\nHypothesis: What was unique?\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "93", + "Fraction of T in Greenlist": "46.7%", + "z-score": "7.08", + "p value": "7.19e-13", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.5627, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.1160, 3.0000,\n 2.8868, 2.7761, 2.6679, 2.5621, 2.7757, 2.6713, 2.5690, 2.7775,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.4743, 3.3729, 3.2733, 3.4641,\n 3.6522, 3.8376, 4.0205, 3.9208, 3.8228, 3.7264, 3.9056, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.9736, 3.8819, 4.0531, 3.9624, 3.8730,\n 4.0415, 3.9530, 3.8657, 3.7796, 3.9452, 4.1090, 4.2710, 4.4313,\n 4.3451, 4.2601, 4.1761, 4.0931, 4.0112, 3.9302, 4.0872, 4.0069,\n 3.9276, 4.0825, 4.2359, 4.3879, 4.5384, 4.6876, 4.6079, 4.5291,\n 4.6765, 4.8226, 4.9675, 4.8889, 5.0323, 5.1745, 5.0964, 5.0190,\n 4.9424, 5.0829, 5.2223, 5.3606, 5.2842, 5.4212, 5.5572, 5.4813,\n 5.4061, 5.3316, 5.4661, 5.3921, 5.5255, 5.4521, 5.3793, 5.5114,\n 5.4391, 5.3675, 5.2965, 5.4272, 5.5570, 5.6858, 5.8138, 5.7429,\n 5.6725, 5.6028, 5.5336, 5.4650, 5.3970, 5.5233, 5.4557, 5.3886,\n 5.5138, 5.6383, 5.7619, 5.8848, 6.0069, 5.9397, 5.8730, 5.9941,\n 6.1146, 6.2342, 6.1677, 6.2866, 6.4048, 6.3385, 6.2728, 6.2075,\n 6.3247, 6.4413, 6.5571, 6.4920, 6.6072, 6.7217, 6.6568, 6.5924,\n 6.5285, 6.6421, 6.5785, 6.6914, 6.6282, 6.5653, 6.6775, 6.6150,\n 6.5528, 6.4911, 6.6024, 6.7132, 6.8233, 6.9330, 6.8713, 6.8101,\n 6.7492, 6.6887, 6.6285, 6.5688, 6.6774, 6.6179, 6.5588, 6.6667,\n 6.7740, 6.8809, 6.9873, 7.0932, 7.0340, 6.9752, 7.0804])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.3467, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.0387, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.8007, 8.9324, 8.8260, 8.9567,\n 9.0863, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.8792, 9.7778, 9.8987, 9.7986, 9.9187, 10.0380,\n 9.9392, 9.8414, 9.9601, 10.0779, 9.9813, 9.8858, 10.0029, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.4015, 12.4998, 12.4150,\n 12.3309, 12.4289, 12.3455, 12.4430, 12.3603, 12.4575, 12.5542, 12.4722,\n 12.3908, 12.4872, 12.5831, 12.5024, 12.4223, 12.5179, 12.4384, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: oh i don't know either the other growing up all i knew was\nHypothesis: I know because I learned it growing up\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.5345, 0.7924, 0.6963, 0.9467, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.5068, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.6678, 1.8791, 1.7889, 1.7002, 1.6131, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.7765, 1.9612, 1.8838, 2.0656,\n 2.2453, 2.4228, 2.3448, 2.2678, 2.1918, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.3126, 2.2393, 2.1669, 2.0954, 2.0247, 1.9548, 2.1213,\n 2.2862, 2.4495, 2.6112, 2.5403, 2.4703, 2.6296, 2.7875, 2.7175,\n 2.6481, 2.8039, 2.7349, 2.8889, 3.0415, 2.9726, 2.9044, 2.8368,\n 2.7699, 2.9200, 3.0688, 3.0019, 3.1492, 3.2953, 3.4402, 3.3731,\n 3.3066, 3.2408, 3.1755, 3.3182, 3.2533, 3.1889, 3.3301, 3.2660,\n 3.2025, 3.3420, 3.2788, 3.4171, 3.5544, 3.4913, 3.4286, 3.5645,\n 3.6995, 3.8335, 3.9666, 3.9036, 3.8411, 3.9729, 4.1038, 4.0415,\n 4.1713, 4.1092, 4.2381, 4.3661, 4.3042, 4.2426, 4.1816, 4.3083,\n 4.4342, 4.3733, 4.4983, 4.6225, 4.7460, 4.6850, 4.6245, 4.5644,\n 4.5047, 4.6268, 4.5674, 4.5083, 4.4497, 4.3915, 4.3336, 4.4544,\n 4.5744, 4.5166, 4.4593, 4.4023, 4.3456, 4.2893, 4.2334, 4.3519,\n 4.4698, 4.4140, 4.5311, 4.6476, 4.5918, 4.5364, 4.4813, 4.5968,\n 4.5419, 4.4873, 4.6020, 4.7161, 4.8295, 4.7749, 4.7206, 4.6667,\n 4.6130, 4.7255, 4.6720, 4.6188, 4.5659, 4.5134, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.8041, 7.0200, 6.7132, 6.9293, 6.6469, 6.8620, 6.5997,\n 6.8127, 6.5672, 6.3333, 6.1101, 6.3255, 6.1137, 5.9106, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.6996, 7.8628, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 8.2054, 8.3589, 8.2195, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.4565, 10.5838, 10.7099, 10.8350, 10.9589, 11.0818, 10.9560, 11.0782,\n 10.9546, 11.0762, 10.9546, 11.0755, 10.9559, 10.8379, 10.7215, 10.8423,\n 10.7277, 10.6145, 10.5027, 10.6232, 10.5131, 10.4042, 10.5243, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.6667, 10.7835, 10.8995, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.3740, 11.4849, 11.5950,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.7222, 11.8299, 11.9370, 11.8392,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.9820, 13.0815, 13.1806, 13.2791,\n 13.3770, 13.4745, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 13.8615,\n 13.9561, 13.8642, 13.9585, 13.8675, 13.9615, 13.8713, 13.7818, 13.6931,\n 13.7870, 13.6990, 13.6117, 13.5250, 13.6188, 13.5329, 13.4477, 13.5412,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.6698, 13.7612, 13.8522, 13.9427, 14.0329, 14.1227, 14.0414, 14.1309,\n 14.2200, 14.1393, 14.2282, 14.3166, 14.4046, 14.3248, 14.4126, 14.5000,\n 14.4208, 14.3422, 14.4294, 14.5162, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: WHOLE LIFE POLICIES - Policies that provide insurance over the insured's entire life and the proceeds (face amount) are paid only upon death of the insured.\nHypothesis: Whole life policies are a type of life insurance that only cover the insured person until retirement from the workforce.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.9847,\n 0.9180, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.5403, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 6.9488, 6.7795, 6.6150, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.2910, 7.1554, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.0667, 7.9472, 8.0928, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.2522, 11.3644, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.6041, 11.7130, 11.8212, 11.9288, 11.8299, 11.9370, 12.0433,\n 11.9457, 11.8491, 11.7533, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.2868, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.7017,\n 12.8017, 12.7100, 12.6190, 12.5289, 12.6287, 12.7279, 12.6387, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 12.9527, 12.8661, 12.9628,\n 12.8769, 12.7918, 12.7073, 12.8037, 12.8997, 12.8160, 12.9116, 12.8285,\n 12.7461, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.3113, 13.2299,\n 13.1491, 13.2419, 13.1617, 13.2542, 13.3463, 13.2668, 13.1878, 13.2796,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.6667,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.9446, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Justice Kennedy does not care what law librarians across the country do with all the Supreme Court Reporters from 1790 through 1998.\nHypothesis: Justice Kennedy doesn't care if the Supreme Court Reporters from 1790 to 1998 are thrown away.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, 0.1502, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, 0.0000, -0.0449, -0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.3320, 9.2463, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.5931, 9.7044, 9.6225,\n 9.5413, 9.4608, 9.5714, 9.6814, 9.6016, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.5840, 9.5066, 9.4299, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.2029, 10.3065, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.6722, 10.5998, 10.6990, 10.7978, 10.8961,\n 10.9939, 10.9220, 10.8505, 10.7795, 10.8770, 10.9740, 10.9034, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The entire city was surrounded by open countryside with a scattering of small villages.\nHypothesis: The whole countryside is scattered with small villages. \nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.6484, 9.5620, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.8430, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.3805, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.4300, 11.3572, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.4444, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.0370, 9.1553, 9.0680, 8.9815,\n 8.8958, 9.0134, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.4513, 9.5638, 9.4812, 9.5931, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.0631, 10.1695, 10.0910, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.0987, 10.2029, 10.3065, 10.4097, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.5998, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 11.0450, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.1218, 11.2171, 11.3120, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: substitute my my yeah my kid'll do uh four or five hours this week for me no problem\nHypothesis: I just can't make the time because of my job.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.6859, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 1.7963, 2.0211, 1.9215, 1.8240, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.9711, 1.8791, 1.7889, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.8543, 1.7765, 1.9612, 2.1436, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.2678, 2.1918, 2.3658, 2.2902, 2.4618,\n 2.6316, 2.7995, 2.9656, 2.8893, 3.0533, 3.2157, 3.1394, 3.0641,\n 2.9897, 2.9161, 2.8433, 2.7713, 2.9299, 2.8583, 2.7875, 2.9439,\n 2.8735, 3.0282, 2.9582, 3.1111, 3.0415, 2.9726, 2.9044, 2.8368,\n 2.7699, 2.7037, 2.8534, 2.7875, 2.7222, 2.8701, 2.8051, 2.7406,\n 2.6768, 2.8226, 2.9673, 2.9035, 2.8402, 2.9832, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.8753, 3.0151, 3.1539, 3.0923, 3.2299, 3.3665,\n 3.5022, 3.4403, 3.5748, 3.7084, 3.8411, 3.9729, 3.9107, 3.8490,\n 3.9795, 3.9181, 3.8571, 3.7966, 3.7366, 3.6770, 3.6178, 3.7463,\n 3.6874, 3.6289, 3.7563, 3.6980, 3.8244, 3.7664, 3.8919, 3.8341,\n 3.7766, 3.9010, 3.8438, 3.7870, 3.7306, 3.8538, 3.7975, 3.7417,\n 3.8638, 3.8081, 3.7528, 3.8740, 3.8189, 3.7641, 3.7097, 3.8297,\n 3.7755, 3.7216, 3.6680, 3.6148, 3.7335, 3.8516, 3.7984, 3.7455,\n 3.8627, 3.8100, 3.7576, 3.7055, 3.6537, 3.7697, 3.8851, 4.0000,\n 3.9481, 4.0622, 4.0105, 3.9590, 3.9078, 4.0210, 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.3138, 8.4449, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 8.8544,\n 8.9752, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.3686, 9.4812, 9.3993, 9.3181, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.4124, 9.5224, 9.6317, 9.7405,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.0701, 9.9940, 10.0987, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.6455, 10.7451, 10.6722, 10.7714, 10.6990, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.8505, 10.7795, 10.7090, 10.8064, 10.9034, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: it's actually there well Iraq has had uh designs on that place since nineteen twenty two so you know it wasn't like something that just suddenly popped up\nHypothesis: The weird thing is that Iraq was never interested in that place until now.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.9555, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.6058, -0.4747, -0.5164, -0.5579, -0.5991, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.1016, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.4770, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.5191, 9.4088, 9.5368, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 9.9469, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.3571, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.4101, 11.5156, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 11.8571, 11.7696, 11.8719, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.2891, 12.2034, 12.1184, 12.2178, 12.1335, 12.2325,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.5401, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 12.8680, 12.7876, 12.8817,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.1746, 13.2668, 13.1878, 13.1094,\n 13.2012, 13.1233, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The final rule contains a Federalism Assessment under Executive Order\nHypothesis: The final rule had a federalism assessment that was added through executive order by the President.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "13.1%", + "z-score": "-3.89", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -1.9702, -1.6330,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.0767, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.0665, -2.1167, -2.1664, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.0847, -2.1309, -2.1768, -2.0000, -2.0461, -2.0918, -2.1372, -1.9640,\n -2.0096, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.3238, -2.1602, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.2197, -2.2608, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -2.8853, -2.9215, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.3075,\n -3.3415, -3.3754, -3.4091, -3.2705, -3.3044, -3.3381, -3.3716, -3.4050,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.6987, -3.7306, -3.7624, -3.7940, -3.8255, -3.8569, -3.8881])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 7.8905, 7.7784, 7.6681, 7.8113, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.7678, 9.6732, 9.7912, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.4524, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.6052, 10.7141, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.5290, 11.4450, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.8673,\n 11.7849, 11.7031, 11.8028, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.0355, 11.9558, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.5495, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.7688,\n 12.8616, 12.9540, 13.0460, 12.9691, 12.8928, 12.9845, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.1966, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Prototyping, for example, may act as part of the requirements definition process, helping the agency identify and control areas of high uncertainty and technical risk.\nHypothesis: Prototyping is not important, testing with the actual finished product is better.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.7223, 0.8980, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, 0.0452, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.0483, 5.3333, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 4.9008, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.0822, 8.2178, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.7141, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.4762, 11.3899, 11.4935, 11.5966, 11.5111, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 11.8503, 11.9504, 12.0499,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.1805, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.5024, 12.5979, 12.5179, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.1376, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.5897, 13.5131, 13.6025, 13.6914, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: The regime's response of ferocious repression plus numerous other ineptitudes led to a third revolution in 1848, with the Bonapartists, led by Napoleon's nephew, emerging triumphant.\nHypothesis: France was ruled by Napoleon's nephew after they won a revolution in 1848.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.2791, -0.3246, -0.3698, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.5403, -0.4145, -0.2894, -0.3299, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "167", + "Fraction of T in Greenlist": "83.9%", + "z-score": "19.2", + "p value": "2.04e-82", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.0751, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.4000, 9.5366, 9.6719, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.8321,\n 10.7098, 10.8327, 10.9546, 11.0755, 11.1954, 11.3143, 11.4323, 11.5494,\n 11.6656, 11.5476, 11.6632, 11.7779, 11.8918, 12.0049, 12.1171, 12.0021,\n 11.8885, 12.0005, 11.8885, 12.0000, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.7597, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.2768, 13.3789, 13.4804, 13.3747, 13.4758, 13.5764, 13.6763,\n 13.7757, 13.8745, 13.9728, 14.0705, 14.1677, 14.2644, 14.3605, 14.4562,\n 14.5513, 14.6459, 14.7400, 14.8337, 14.9269, 15.0195, 15.1118, 15.2036,\n 15.2949, 15.3858, 15.4762, 15.5662, 15.6558, 15.7449, 15.8336, 15.9220,\n 16.0099, 16.0974, 16.1845, 16.2712, 16.3575, 16.4435, 16.5291, 16.6143,\n 16.6991, 16.7835, 16.8676, 16.9514, 17.0348, 17.1178, 17.2005, 17.2829,\n 17.3649, 17.4466, 17.5280, 17.6090, 17.6897, 17.7701, 17.8502, 17.9300,\n 18.0095, 18.0886, 18.1675, 18.2461, 18.3243, 18.4023, 18.4800, 18.5574,\n 18.6345, 18.7114, 18.7879, 18.8642, 18.7722, 18.8484, 18.9243, 19.0000,\n 19.0754, 18.9847, 19.0600, 19.1350, 19.2098, 19.2843, 19.1949])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Boats in daily use lie within feet of the fashionable bars and restaurants.\nHypothesis: Bars and restaurants are interesting places.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.0451, 3.9158, 3.7897, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.3618, 4.5569, 4.7488, 4.9377,\n 4.8177, 4.7002, 4.8857, 4.7703, 4.6571, 4.8394, 4.7281, 4.6188,\n 4.5115, 4.4061, 4.3026, 4.2008, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.2571, 4.1612, 4.3333, 4.5034, 4.6715, 4.8375, 5.0017, 4.9058,\n 4.8113, 4.7181, 4.6262, 4.5356, 4.4462, 4.6070, 4.5186, 4.4313,\n 4.3451, 4.5035, 4.6603, 4.8154, 4.9691, 5.1212, 5.0350, 4.9497,\n 4.8655, 4.7823, 4.7001, 4.8497, 4.7682, 4.6876, 4.8355, 4.7556,\n 4.6765, 4.5983, 4.5210, 4.4444, 4.3687, 4.5140, 4.4388, 4.5826,\n 4.7252, 4.6503, 4.7916, 4.9317, 5.0707, 5.2086, 5.1338, 5.0596,\n 4.9862, 5.1225, 5.0496, 5.1848, 5.3189, 5.4521, 5.5842, 5.7155,\n 5.8458, 5.9752, 5.9019, 5.8292, 5.7572, 5.6858, 5.6150, 5.5448,\n 5.4752, 5.4062, 5.5336, 5.4650, 5.3970, 5.3295, 5.2626, 5.1962,\n 5.1303, 5.0649, 5.1905, 5.3153, 5.2501, 5.1854, 5.1213, 5.0576,\n 4.9943, 4.9316, 4.8693, 4.8074, 4.9303, 4.8687, 4.8076, 4.7469,\n 4.8687, 4.8083, 4.9292, 5.0494, 4.9891, 4.9292, 4.8698, 4.8107,\n 4.7520, 4.8709, 4.9891, 4.9305, 5.0479, 5.1647, 5.2809, 5.2223,\n 5.1642, 5.2795, 5.2215, 5.3361, 5.4501, 5.3923, 5.3349, 5.4480,\n 5.3909, 5.5033, 5.4464, 5.3898, 5.5015, 5.4451, 5.5562, 5.6667,\n 5.6104, 5.5545, 5.6643, 5.6085, 5.5532, 5.4981, 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.3434, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 7.8320, 7.6667,\n 7.5056, 7.6751, 7.8420, 8.0064, 7.8512, 8.0139, 8.1742, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 9.0990, 8.9618, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.4000, 9.5366, 9.6719, 9.5443, 9.6786, 9.8116,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.7065, 9.8367, 9.7181, 9.8473,\n 9.9754, 10.1024, 9.9863, 10.1124, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.5601, 11.4531, 11.5655,\n 11.4599, 11.5718, 11.4675, 11.5788, 11.4759, 11.3740, 11.4849, 11.3842,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.5157, 11.6242, 11.7320, 11.8392,\n 11.9457, 11.8491, 11.7533, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.2868, 12.3895, 12.4915, 12.5930, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.7100, 12.6190, 12.7187, 12.6287, 12.5394, 12.6387, 12.5503,\n 12.4625, 12.5615, 12.6601, 12.7581, 12.8556, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.0690, 13.1644, 13.2593, 13.3537, 13.2690, 13.1849,\n 13.1014, 13.1957, 13.2895, 13.2068, 13.3002, 13.3933, 13.4859, 13.5781,\n 13.4963, 13.5881, 13.5069, 13.5985, 13.6896, 13.7803, 13.8707, 13.7904,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.3248, 14.2455, 14.1667,\n 14.2546, 14.3422, 14.4294, 14.5162, 14.4382, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: so i guess my experience is is just with what we did and and so they didn't really go through the child care route they were able to be home together\nHypothesis: They were able to be home rather than having to worry about getting child care.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.9055, 3.1334, 3.0123, 2.8943, 3.1160, 3.0000,\n 2.8868, 3.1027, 2.9913, 2.8823, 2.7757, 2.6713, 2.5690, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.9814, 2.8830, 2.7863, 2.9823, 2.8868,\n 2.7928, 2.9848, 3.1741, 3.0806, 2.9887, 3.1743, 3.0833, 2.9938,\n 3.1760, 3.3558, 3.2667, 3.1789, 3.3556, 3.2686, 3.1829, 3.3566,\n 3.5283, 3.4429, 3.3587, 3.5277, 3.4442, 3.3619, 3.5282, 3.6927,\n 3.6107, 3.5298, 3.6919, 3.6116, 3.5322, 3.6920, 3.8503, 3.7712,\n 3.6931, 3.8492, 3.7717, 3.6950, 3.8490, 4.0016, 3.9253, 3.8497,\n 4.0004, 3.9254, 3.8512, 4.0000, 4.1475, 4.0736, 4.0004, 4.1461,\n 4.0734, 4.0015, 4.1455, 4.2885, 4.2167, 4.1457, 4.2870, 4.2164,\n 4.1464, 4.2862, 4.4249, 4.3552, 4.2861, 4.4234, 4.3547, 4.2866,\n 4.4225, 4.5573, 4.4895, 4.4222, 4.5557, 4.4888, 4.4224, 4.5547,\n 4.6860, 4.6198, 4.5542, 4.6843, 4.6191, 4.5543, 4.6832, 4.8113,\n 4.7467, 4.6826, 4.8095, 4.7458, 4.6825, 4.8083, 4.9333, 4.8702,\n 4.8076, 4.9316, 4.8693, 4.8074, 4.9303, 5.0525, 4.9908, 4.9295,\n 5.0507, 4.9897, 4.9292, 5.0494, 5.1689, 5.1085, 5.0485, 5.1671,\n 5.1073, 5.0480, 5.1657, 5.2827, 5.2235, 5.1647, 5.2809, 5.2223,\n 5.1642, 5.2795, 5.3941, 5.3361, 5.2784, 5.3923, 5.3349, 5.2778,\n 5.3909, 5.5033, 5.4464, 5.3898, 5.5015, 5.4451, 5.3891, 5.5000,\n 5.6104, 5.5545, 5.4989, 5.6085, 5.5532, 5.4981, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.2222, 7.4194, 7.1832, 7.3786, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.5144, 7.3131, 7.4983, 7.6800, 7.4885, 7.6681,\n 7.4839, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.1654, 8.3283, 8.1654, 8.3267, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.4884, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.5105, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.6817, 8.8271, 8.6948, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.3901, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.6813, 9.5743, 9.6995, 9.5939, 9.4896, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 10.9123, 11.0227,\n 10.9291, 11.0389, 11.1480, 11.0554, 11.1640, 11.0724, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.2316, 11.3373, 11.4425, 11.3546,\n 11.4592, 11.3721, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.6311, 11.5471, 11.4638, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.4223, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.6439, 12.7378, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.6918, 12.7847, 12.8771, 12.8007, 12.8928, 12.8169, 12.9087, 12.8333,\n 12.9247, 13.0157, 13.1063, 13.0316, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: DOD's common practice for managing this environment has been to create aggressive risk reduction efforts in its programs.\nHypothesis: Creating risk reduction efforts is common practice.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.8266, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.7143, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.6140, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.1881, 8.0667, 7.9472, 8.0928, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.8007, 8.9324, 8.8260, 8.7210,\n 8.6173, 8.5149, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.5427, 10.4524, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.9107, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.4762, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.5264, 12.4430, 12.5401, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.8414, 12.7597, 12.6785, 12.5979, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.0030, 13.0956, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.1376, 13.0608, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.4510, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Then he ran.\nHypothesis: He ran like an athlete.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "81", + "Fraction of T in Greenlist": "40.7%", + "z-score": "5.12", + "p value": "1.56e-07", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.5627, 2.8098, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.1412, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.6833, 2.8830, 2.7863, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.8919, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 2.1678, 2.0913, 2.0158, 2.1918, 2.1167, 2.0426, 1.9695,\n 2.1420, 2.3126, 2.2393, 2.4077, 2.3349, 2.5011, 2.6656, 2.8284,\n 2.7552, 2.6828, 2.6112, 2.7713, 2.9299, 3.0870, 3.2426, 3.3968,\n 3.3243, 3.4768, 3.6279, 3.7778, 3.9263, 3.8534, 3.7812, 3.7097,\n 3.6389, 3.5689, 3.4995, 3.4308, 3.3627, 3.2953, 3.2285, 3.1623,\n 3.3066, 3.2408, 3.3838, 3.5256, 3.6664, 3.6004, 3.7399, 3.8784,\n 3.8125, 3.9497, 3.8841, 3.8191, 3.7547, 3.8903, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 4.0988, 4.0356, 3.9729, 3.9107, 4.0415,\n 3.9795, 4.1092, 4.0476, 4.1763, 4.1150, 4.0541, 4.1816, 4.1210,\n 4.0608, 4.0011, 4.1273, 4.2527, 4.1931, 4.3176, 4.2582, 4.3818,\n 4.5047, 4.6268, 4.5674, 4.5083, 4.4497, 4.5707, 4.6911, 4.8107,\n 4.9297, 5.0480, 4.9891, 5.1066, 5.2235, 5.3398, 5.2809, 5.2223,\n 5.1642, 5.1064, 5.0489, 4.9918, 4.9351, 4.8787, 4.9934, 4.9373,\n 4.8815, 4.9953, 5.1086, 5.2213, 5.1655, 5.1100, 5.0548, 5.1667,\n 5.1117, 5.0571, 5.0027, 5.1137, 5.2241, 5.1698, 5.1159])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.4", + "p value": "5.07e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.6338, 10.7444, 10.6534, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.8025, 10.9107, 10.8224, 10.7349, 10.8426, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.4450, 11.5471, 11.6487, 11.7498, 11.6666, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.1805, 12.2782, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.3263, 12.4223, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.2927, 13.2149, 13.1376, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.2572, 13.3473, 13.2717, 13.3615])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: you sound like this girl that i talked to about books and we got into movies one night\nHypothesis: I found out about so many movies I had never heard of.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.5556, 1.4931, 1.4313, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.3373, 1.4857, 1.4289,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.3957, 1.3448, 1.4792, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 1.1651, 1.2956, 1.2472,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.2049, 1.1587, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.2700, 1.2244, 1.1790, 1.1339, 1.2566, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.2776, 6.4413, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 6.9714,\n 6.8641, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.1813, 7.3233, 7.4639, 7.6033, 7.5032, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.7555, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.0000, 7.9079, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.1481, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.6924, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 8.9113,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.2055, 9.1250, 9.0452,\n 9.1577, 9.0786, 9.1905, 9.3017, 9.4124, 9.3338, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.4619, 9.5695, 9.4939,\n 9.6008, 9.7072, 9.8131, 9.7380, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.9038, 10.0074, 9.9340, 9.8611, 9.9642, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.0547, 10.1558, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.3148, 10.2447, 10.3439, 10.2743, 10.3730, 10.4713, 10.5692, 10.5000,\n 10.4312, 10.5286, 10.4603, 10.5573, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: if the United States had used full conventional power.\nHypothesis: The United States has no power to use.\nRelation:", + "true_label": 2, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.2844, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.8601, 0.8095, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.5952, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.8245, 0.7789, 0.9062, 0.8607, 0.9870, 0.9415, 1.0670, 1.0215,\n 0.9763, 0.9313, 1.0555, 1.0106, 0.9659, 0.9215, 1.0444, 1.0000,\n 0.9558, 1.0777, 1.0336, 0.9897, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "188", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "58.0%", + "z-score": "10.4", + "p value": "7.91e-26", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.5234, 7.4146, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.4247, 7.5653, 7.4639, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.7555, 7.6603, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.0000, 7.9079, 7.8168, 7.9460, 7.8558,\n 7.9839, 8.1111, 8.0219, 8.1481, 8.0598, 8.1850, 8.3093, 8.2219,\n 8.1354, 8.2588, 8.1731, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.5607, 8.6783, 8.7952, 8.9113,\n 9.0267, 8.9448, 8.8636, 8.7831, 8.7033, 8.8179, 8.7388, 8.8527,\n 8.9660, 8.8874, 9.0000, 9.1119, 9.2232, 9.3338, 9.2559, 9.1785,\n 9.2885, 9.2118, 9.1357, 9.2450, 9.3537, 9.4619, 9.5695, 9.6764,\n 9.6008, 9.5258, 9.6322, 9.5577, 9.6635, 9.7688, 9.8736, 9.9778,\n 9.9038, 10.0074, 9.9340, 9.8611, 9.7886, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.0547, 10.1558, 10.0848, 10.0143, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.4427])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: yeah really no kidding\nHypothesis: Really? No kidding! \nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "193", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.8%", + "z-score": "-1.04", + "p value": "0.851", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.7303,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.6470, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.3740, 11.2732, 11.1734,\n 11.2846, 11.1860, 11.2966, 11.4065, 11.5157, 11.6242, 11.7320, 11.8392,\n 11.9457, 12.0516, 12.1568, 12.0605, 11.9650, 12.0699, 11.9754, 12.0798,\n 11.9863, 12.0902, 11.9977, 11.9060, 12.0096, 12.1125, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.6287, 12.7279, 12.8267, 12.7376,\n 12.8359, 12.7476, 12.6601, 12.7581, 12.6713, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 12.9840, 12.8997, 12.9952, 12.9116, 12.8285,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.4263, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.5408, 13.6313, 13.7215, 13.6429, 13.7327, 13.6546, 13.5771, 13.6667,\n 13.5897, 13.5131, 13.6025, 13.6914, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Visit at sundown or out of season to get the full flavor of the setting.\nHypothesis: The setting is better to visit at sundown or during low season.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 0.9661, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.9393, 0.8716, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.7256, 0.6667, 0.8295, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.6768, 0.6222, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.6983, 0.6460, 0.7921,\n 0.9372, 1.0812, 1.0284, 0.9759, 1.1183, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.9017, 0.8540, 0.9858, 1.1169, 1.0690,\n 1.0215, 0.9742, 1.1038, 1.0565, 1.1852, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 1.0788, 1.2049, 1.1587, 1.2839, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.0444, 1.0000,\n 0.9558, 0.9119, 1.0336, 1.1547, 1.1106, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "163", + "Fraction of T in Greenlist": "81.9%", + "z-score": "18.5", + "p value": "4.91e-77", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.3509, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.7039, 8.8648, 9.0233, 8.8426, 8.6667,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.6600, 10.7918, 10.9222, 10.7732, 10.6270, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.6189,\n 11.4829, 11.6039, 11.7239, 11.8429, 11.9609, 12.0779, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.4072, 12.2794, 12.3928, 12.5053, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 13.1636, 13.2706, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 13.8497, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.6283, 14.5173, 14.4075, 14.5045, 14.6010, 14.6969,\n 14.7924, 14.8873, 14.9817, 15.0756, 15.1690, 15.2619, 15.1556, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.7680, 15.6667, 15.7560, 15.8450, 15.9335, 16.0216, 16.1093,\n 16.1966, 16.2835, 16.3700, 16.4561, 16.3575, 16.2598, 16.3459, 16.4317,\n 16.5171, 16.6021, 16.6868, 16.7711, 16.8550, 16.9386, 17.0218, 16.9265,\n 16.8320, 16.9152, 16.9982, 17.0807, 17.1630, 17.2449, 17.3265, 17.4078,\n 17.4887, 17.5693, 17.4770, 17.3854, 17.4660, 17.5464, 17.6264, 17.7061,\n 17.7856, 17.8647, 17.9435, 18.0221, 18.1003, 18.0107, 17.9217, 18.0000,\n 18.0780, 18.1557, 18.2331, 18.3103, 18.3871, 18.4637, 18.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: In a further role reversal, Gingrich may have positioned himself to fill it.\nHypothesis: Gingrich should not be in power.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -1.8958, -1.9437, -1.9911, -2.0381,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.2813, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.1691, -2.2111, -2.2528, -2.2943, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.7187, -2.7541, -2.6178, -2.6534, -2.6888, -2.7240,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.7701, -2.8043, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 7.0763,\n 6.9601, 6.8458, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.2202, 9.1273, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.8838, 10.7987, 10.7143, 10.6306, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.3402, 11.4411, 11.5414, 11.6412, 11.7405, 11.8393, 11.9377,\n 12.0355, 11.9558, 11.8766, 11.9741, 12.0712, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.3163, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.4460, 12.5394, 12.6323, 12.7248, 12.6494, 12.7416, 12.8333,\n 12.9247, 12.8499, 12.9410, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: All of the islands are now officially and proudly part of France, not colonies as they were for some three centuries.\nHypothesis: The islands are part of France now instead of just colonies.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.6713, -0.5143, -0.3586, -0.2041,\n -0.2542, -0.3038, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.6%", + "z-score": "12.2", + "p value": "1.11e-34", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.6470, 9.5368, 9.6638,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.8792, 9.7778, 9.8987, 10.0188, 10.1379, 10.0380,\n 10.1564, 10.0577, 9.9601, 10.0779, 10.1948, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.9123, 11.0227,\n 10.9291, 10.8363, 10.9462, 10.8544, 10.7635, 10.8729, 10.9816, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.3721, 11.2857, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.4450, 11.5471, 11.4638, 11.3812, 11.2992, 11.2178, 11.1370,\n 11.2389, 11.3402, 11.2602, 11.3610, 11.4614, 11.3820, 11.3032, 11.4031,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.5868, 11.5109, 11.6082, 11.7050, 11.8014, 11.8973, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.8638, 11.9586, 12.0529, 12.1468, 12.0731, 12.0000,\n 12.0935, 12.0209, 12.1141, 12.2068, 12.2992, 12.2271])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: (For more information on BLM's senior executive performance plans, see app.\nHypothesis: BLM's performance plans are visible online.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "87", + "Fraction of T in Greenlist": "43.7%", + "z-score": "6.1", + "p value": "5.36e-10", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 3.7017, 3.5590, 3.7905, 4.0166,\n 3.8772, 3.7417, 3.6098, 3.8297, 3.7009, 3.5753, 3.7897, 3.6667,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.4101, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.2796, 3.1787, 3.0796, 2.9823, 3.1754,\n 3.3657, 3.2691, 3.1741, 3.3607, 3.2667, 3.1743, 3.0833, 3.2660,\n 3.1760, 3.0873, 3.2667, 3.4438, 3.6187, 3.7916, 3.7025, 3.8730,\n 4.0415, 4.2080, 4.3727, 4.5356, 4.6967, 4.6070, 4.5186, 4.4313,\n 4.5899, 4.7469, 4.9023, 5.0562, 4.9691, 4.8830, 5.0350, 5.1855,\n 5.3345, 5.4822, 5.6285, 5.5426, 5.4576, 5.3736, 5.2906, 5.4349,\n 5.3526, 5.2713, 5.1908, 5.1111, 5.2535, 5.1745, 5.0964, 5.2372,\n 5.3769, 5.2992, 5.2223, 5.1461, 5.2842, 5.2086, 5.1338, 5.2705,\n 5.1962, 5.1225, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.0325, 4.9624, 4.8930, 5.0252, 4.9562, 4.8878, 4.8200, 4.7527,\n 4.8833, 5.0130, 4.9460, 4.8795, 5.0080, 4.9419, 4.8763, 4.8113,\n 4.9385, 4.8737, 4.8095, 4.9356, 5.0609, 5.1854, 5.3092, 5.2449,\n 5.3677, 5.4899, 5.6112, 5.7319, 5.8519, 5.9711, 5.9065, 5.8424,\n 5.7787, 5.8969, 6.0145, 6.1314, 6.2476, 6.1839, 6.1207, 6.2361,\n 6.3509, 6.4650, 6.5785, 6.6914, 6.6282, 6.5653, 6.5029, 6.4409,\n 6.5528, 6.4911, 6.4298, 6.3689, 6.3084, 6.4194, 6.3592, 6.2993,\n 6.4096, 6.5193, 6.4597, 6.4004, 6.3414, 6.4504, 6.3917, 6.3333,\n 6.2753, 6.2177, 6.3258, 6.2684, 6.2113, 6.1546, 6.0982])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.1355, 6.0125, 5.8919, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.3035, 6.4510, 6.3549, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.3901, 7.5258, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.8699, 8.0000, 8.1291, 8.0370, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.3976, 8.3093, 8.2219,\n 8.1354, 8.2588, 8.1731, 8.0882, 8.0042, 8.1266, 8.0434, 8.1650,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.7610, 8.8778, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.4608, 9.3810, 9.3017, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.4752, 9.5840, 9.5066, 9.6148, 9.7224, 9.8293, 9.7526, 9.8590,\n 9.9648, 10.0701, 9.9940, 10.0987, 10.2029, 10.1273, 10.0523, 9.9778,\n 10.0814, 10.0074, 9.9340, 9.8611, 9.7886, 9.8918, 9.9944, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.8505, 10.7795, 10.7090, 10.8064, 10.9034, 11.0000,\n 11.0961, 11.1919, 11.1218, 11.2171, 11.3120, 11.2424, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: While parents may pick up this gay semaphore, kids aren't likely to.\nHypothesis: Some kids do understand gay signals.\nRelation:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.7%", + "z-score": "-3.33", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.3102, -2.3570,\n -2.4035, -2.4495, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.4885, -2.5322, -2.5756, -2.6186,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.6575, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.8928, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.6934, -2.7325, -2.7714, -2.8101, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -2.8852, -2.9227, -2.9600, -2.9971,\n -2.8472, -2.8845, -2.9216, -2.9584, -2.9950, -3.0315, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -3.0657, -2.9215, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.0987, -3.1334,\n -3.1679, -3.2023, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.2164, 6.3890, 6.2610, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.5593, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.5543, 9.4606, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.3326, 9.4501, 9.3611, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.3085, 10.2253, 10.3333, 10.2509, 10.3583, 10.4652, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.5393, 10.6439, 10.7480, 10.6683, 10.7719,\n 10.8749, 10.7959, 10.8984, 11.0004, 10.9220, 11.0235, 10.9458, 10.8686,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.8014, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.0529, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer with 'entailment', 'contradiction', or 'neutral':\nPremise: Taking an ecumenical tack, nation officials in Chicago recently issued edicts commanding preachers to back off their anti-Semitic rhetoric.\nHypothesis: Nation officials in Chicago are involved in religious issues.\nRelation:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "39.7%", + "z-score": "2.91", + "p value": "0.00183", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.7408, 1.9795, 1.8728, 2.1054, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.9215, 1.8240, 2.0428, 2.2576, 2.1602,\n 2.3706, 2.2743, 2.4804, 2.6833, 2.8830, 2.7863, 2.6914, 2.5981,\n 2.7928, 2.7005, 2.8919, 3.0806, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.9057])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "59.1%", + "z-score": "11.1", + "p value": "8e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.1111, 3.9279, 3.7524, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 5.8635,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.0469, 6.2106, 6.0982, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.0711,\n 6.9759, 7.1152, 7.2532, 7.3901, 7.5258, 7.6603, 7.7937, 7.6995,\n 7.6064, 7.7387, 7.8699, 7.7778, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.6924, 8.8108, 8.7267, 8.8443, 8.7610, 8.8778, 8.9940, 8.9113,\n 8.8294, 8.9448, 8.8636, 8.9783, 8.8978, 9.0117, 9.1250, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.4752, 9.3979, 9.3212, 9.4299, 9.5381, 9.6456, 9.5695, 9.4939,\n 9.6008, 9.7072, 9.8131, 9.9184, 10.0231, 10.1273, 10.0523, 9.9778,\n 10.0814, 10.1846, 10.1106, 10.2132, 10.3154, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.5998, 10.6990, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 10.9480, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.0261, 10.9564, 10.8872, 10.9829, 11.0782])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 2, + "predicted_label_with_watermark": 2 + } + ], + "metrics": { + "accuracy_without_watermark": 0.37, + "accuracy_with_watermark": 0.35, + "f1_without_watermark": 0.26940892298948566, + "f1_with_watermark": 0.25000682333033103 + } + } + }, + "qnli": { + "train": { + "results": [ + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When did the third Digimon series begin?\nContext: Unlike the two seasons before it and most of the seasons that followed, Digimon Tamers takes a darker and more realistic approach to its story featuring Digimon who do not reincarnate after their deaths and more complex character development in the original Japanese.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.3586, -0.2041,\n -0.0508, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.2261, 0.1803, 0.3146, 0.2689, 0.4021, 0.5345,\n 0.6662, 0.7971, 0.7506, 0.8805, 0.8340, 0.7878, 0.7419, 0.8704,\n 0.9981, 0.9520, 1.0788, 1.0328, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.9313, 0.8866, 1.0106, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 4.8662, 4.7357, 4.9316, 5.1241, 4.9962, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.5593, 7.7026, 7.8444, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.9861, 8.8889, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.2202, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.7473, 9.8632, 9.9783, 10.0926, 10.0021,\n 9.9124, 9.8236, 9.9373, 10.0504, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.2592, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.5475, 10.4652, 10.5714, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 10.8749, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.2250,\n 11.1473, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.4891,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.6731, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.0000,\n 11.9273, 11.8551, 11.9487, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which missile batteries often have individual launchers several kilometres from one another?\nContext: When MANPADS is operated by specialists, batteries may have several dozen teams deploying separately in small sections; self-propelled air defence guns may deploy in pairs.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "120", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "30.8%", + "z-score": "1.48", + "p value": "0.07", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.6013, 1.5068, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.6678, 1.8791, 1.7889, 1.9959, 2.1997, 2.1094, 2.0207,\n 1.9335, 2.1320, 2.0455, 1.9604, 1.8766, 2.0702, 1.9870, 2.1773,\n 2.0948, 2.0135, 1.9333, 1.8543, 2.0397, 2.2226, 2.1436, 2.3238,\n 2.2453, 2.4228, 2.3448, 2.2678, 2.1918, 2.1167, 2.0426, 1.9695,\n 1.8972, 2.0692, 1.9973, 1.9262, 1.8559, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.8475, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.6186, 1.7778, 1.7143, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.7767, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.8780, 8.0546, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 7.6594, 7.4878, 7.6613, 7.4952, 7.3333,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.2168, 7.0711, 7.2400, 7.0980,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.9754, 9.8590, 9.7442, 9.8716, 9.9980, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.6813, 9.8064, 9.6995, 9.8237, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.5331, 9.6559, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.5645, 11.6709, 11.5779, 11.4857,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 12.0218, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.2593, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.3728, 13.2895, 13.3829, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.6698, 13.7612, 13.8522, 13.9427, 14.0329, 13.9515, 14.0414, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.3166, 14.4046, 14.4923, 14.5797, 14.5000,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.6027, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What two things does Popper argue Tarski's theory involves in an evaluation of truth?\nContext: He bases this interpretation on the fact that examples such as the one described above refer to two things: assertions and the facts to which they refer.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, -0.0420, -0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 5.9604, 5.6804, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.9969, 9.8387, 9.6838, 9.5321, 9.3834, 9.5263,\n 9.3811, 9.5230, 9.3811, 9.2418, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.4614, 10.3288, 10.4581, 10.3280,\n 10.1999, 10.3287, 10.4565, 10.5830, 10.7084, 10.8328, 10.9560, 11.0782,\n 10.9546, 10.8327, 10.7125, 10.8347, 10.9559, 11.0761, 10.9585, 10.8423,\n 10.9621, 11.0810, 10.9669, 11.0851, 11.2025, 11.3189, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.8895, 11.7803, 11.8915, 12.0020,\n 11.8944, 12.0044, 12.1136, 12.0077, 11.9029, 11.7992, 11.6966, 11.8058,\n 11.9144, 11.8132, 11.7130, 11.6139, 11.7222, 11.6242, 11.5271, 11.6351,\n 11.7424, 11.6465, 11.7533, 11.8594, 11.9650, 11.8704, 11.7766, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.1492, 12.2503, 12.1622, 12.0749, 11.9883,\n 12.0891, 12.1893, 12.1036, 12.2034, 12.3027, 12.2178, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.1646, 12.2627, 12.1805, 12.2782, 12.1967, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.2467, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.4109, 12.3342, 12.4283,\n 12.3523, 12.4460, 12.3705, 12.2954, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.7928, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the name of the village 9 miles north of Calafat where the Ottoman forces attacked the Russians?\nContext: On 31 December 1853, the Ottoman forces at Calafat moved against the Russian force at Chetatea or Cetate, a small village nine miles north of Calafat, and engaged them on 6 January 1854.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.2%", + "z-score": "-2.87", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -2.8402,\n -2.8868, -2.9329, -2.9785, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -2.9336, -2.9775, -3.0210, -3.0641,\n -3.1069, -3.1493, -3.1914, -3.2332, -3.2746, -3.3156, -3.3564, -3.1704,\n -3.2116, -3.2525, -3.2931, -3.3333, -3.3733, -3.4130, -3.2332, -3.0551,\n -3.0958, -2.9200, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.0714, -3.1109, -3.1500, -3.1889, -3.2276, -3.2660,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.4403, -3.4769, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.3717,\n -3.2206, -3.2567, -3.1071, -3.1433, -3.1794, -3.2152, -3.2509, -3.1038,\n -3.1396, -3.1753, -3.0298, -3.0657, -3.1013, -2.9575, -2.9933, -3.0290,\n -3.0644, -2.9225, -2.7815, -2.8174, -2.8532, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -3.0995, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -2.9692, -2.8383, -2.8721])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094, 2.6605, 2.9938,\n 3.3113, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998, 3.5796, 3.8497, 4.1111,\n 4.3644, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.7735,\n 5.6000, 5.4322, 5.6395, 5.8424, 5.6805, 5.5234, 5.7229, 5.5705, 5.7664,\n 5.6183, 5.4740, 5.3333, 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 5.8140,\n 5.6830, 5.5549, 5.7354, 5.6099, 5.4870, 5.3666, 5.2485, 5.4259, 5.6009,\n 5.4848, 5.3709, 5.5432, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 5.9874, 6.1450, 6.0410, 6.1968, 6.3509,\n 6.2483, 6.1471, 6.0474, 6.1996, 6.3502, 6.4993, 6.4008, 6.5483, 6.6944,\n 6.8391, 6.9824, 7.1243, 7.0268, 6.9305, 6.8354, 6.9759, 6.8819, 7.0211,\n 6.9282, 6.8364, 6.9743, 6.8834, 7.0201, 7.1556, 7.0657, 6.9768, 6.8889,\n 7.0231, 6.9361, 7.0692, 6.9830, 7.1149, 7.2459, 7.3758, 7.5048, 7.6328,\n 7.5472, 7.4625, 7.3786, 7.5056, 7.4225, 7.5484, 7.4661, 7.3845, 7.5094,\n 7.4286, 7.5526, 7.6758, 7.5955, 7.5161, 7.6383, 7.5595, 7.6808, 7.6026,\n 7.5251, 7.4483, 7.3721, 7.4924, 7.6120, 7.7308, 7.8489, 7.7732, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.7831, 7.8988, 8.0139, 7.9403, 7.8673, 7.9816,\n 8.0952, 8.0227, 7.9507, 8.0636, 7.9921, 7.9211, 8.0333, 7.9628, 8.0742,\n 8.1851, 8.2954, 8.4050, 8.5141, 8.4439, 8.5524, 8.4826, 8.4133, 8.5212,\n 8.6284, 8.7351, 8.8413, 8.9469, 9.0520, 9.1566, 9.2607, 9.1916, 9.2952,\n 9.3982, 9.5007, 9.6028, 9.5341, 9.4658, 9.5673, 9.6684, 9.6005, 9.5331,\n 9.6336, 9.5666, 9.6667, 9.6000, 9.5338, 9.4680, 9.4026, 9.5021, 9.6011,\n 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What famous palace is located in London?\nContext: London contains four World Heritage Sites: the Tower of London; Kew Gardens; the site comprising the Palace of Westminster, Westminster Abbey, and St Margaret's Church; and the historic settlement of Greenwich (in which the Royal Observatory, Greenwich marks the Prime Meridian, 0\u00b0 longitude, and GMT).\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "176", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "28.4%", + "z-score": "1.04", + "p value": "0.148", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 1.2372, 1.4697, 1.3744, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.6678, 1.5785, 1.7889, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.8543, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.4403, 1.6187, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.3856, 1.3213, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.3460, 1.2865, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.3943, 1.5430, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 1.0598, 1.0105, 0.9615, 0.9129,\n 1.0465, 0.9979, 0.9497, 0.9017, 1.0338, 0.9858, 1.1169, 1.2472,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.0445])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.3984, 8.5337, 8.4270, 8.3217, 8.2178, 8.1152, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.5543, 9.4606, 9.5795, 9.4868,\n 9.3951, 9.5133, 9.6307, 9.7473, 9.8632, 9.7725, 9.8877, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.3630, 10.4738, 10.3853, 10.4956,\n 10.6052, 10.5175, 10.6265, 10.7349, 10.8426, 10.7559, 10.6700, 10.7772,\n 10.6920, 10.6076, 10.5238, 10.6306, 10.5475, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.9906, 10.9091, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.6606, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.7200, 11.8176, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.9534, 12.0493, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.7928, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When is the term 'German dialects' used in regard to the German language?\nContext: When talking about the German language, the term German dialects is only used for the traditional regional varieties.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.9608, 1.1896, 1.4142, 1.6348, 1.5430,\n 1.7589, 1.6678, 1.8791, 1.7889, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.8477, 1.7634, 1.6803, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 1.1946, 1.3587,\n 1.5213, 1.4580, 1.6186, 1.5556, 1.7143, 1.6514, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.5430, 1.4857, 1.4289,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.4744, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 1.0879, 1.0371, 0.9867, 1.1239,\n 1.2603, 1.3957, 1.5303, 1.4792, 1.6127, 1.5617, 1.6941, 1.6432,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.7028, 1.6530, 1.6036,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.5363, 1.4881, 1.4402, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.4551, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.5621, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.83", + "p value": "4.13e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094, 2.6605, 2.9938,\n 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998, 3.1177, 2.9439, 2.7778,\n 3.0551, 3.3235, 3.1623, 3.4219, 3.2660, 3.5176, 3.3665, 3.2205, 3.4641,\n 3.7017, 3.9337, 4.1603, 4.3818, 4.5985, 4.4544, 4.6664, 4.5260, 4.3894,\n 4.2563, 4.4634, 4.6667, 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997,\n 5.3716, 5.5549, 5.4295, 5.3067, 5.1864, 5.0684, 4.9528, 5.1326, 5.3100,\n 5.4848, 5.3709, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.7242, 5.8835, 5.7812, 5.9386, 6.0943,\n 5.9932, 6.1471, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469, 6.7931, 6.9378,\n 6.8391, 6.7416, 6.8849, 6.7886, 6.6935, 6.8354, 6.7414, 6.6486, 6.7890,\n 6.6973, 6.6066, 6.5169, 6.6559, 6.7937, 6.9303, 7.0657, 7.2001, 7.1111,\n 7.2443, 7.1563, 7.0692, 7.2012, 7.1149, 7.0296, 6.9451, 7.0759, 7.2058,\n 7.3346, 7.2508, 7.1678, 7.2956, 7.4225, 7.5484, 7.4661, 7.3845, 7.5094,\n 7.6335, 7.7567, 7.8791, 8.0006, 7.9196, 7.8393, 7.7597, 7.8803, 8.0002,\n 7.9212, 8.0402, 7.9619, 7.8842, 8.0024, 8.1198, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.5052, 8.4286, 8.5424, 8.4664, 8.5796, 8.6921, 8.8039, 8.9151,\n 8.8396, 8.7647, 8.8752, 8.8008, 8.7270, 8.8369, 8.9461, 9.0548, 8.9815,\n 8.9086, 9.0167, 9.1242, 9.0518, 9.1587, 9.2651, 9.3708, 9.2990, 9.2276,\n 9.3328, 9.2619, 9.1915, 9.2961, 9.4002, 9.5038, 9.4338, 9.3642, 9.4673,\n 9.3982, 9.5007, 9.6028, 9.7043, 9.8054, 9.7367, 9.6684, 9.7690, 9.7011,\n 9.6336, 9.7337, 9.8333, 9.9325, 9.8654, 9.7987, 9.8974, 9.8311])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the name of the island the English traded to the Dutch in return for New Amsterdam?\nContext: At the end of the Second Anglo-Dutch War, the English gained New Amsterdam (New York) in North America in exchange for Dutch control of Run, an Indonesian island.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "28.1%", + "z-score": "0.913", + "p value": "0.181", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.6928, 0.6319, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 4.8038, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.1152, 8.0139,\n 8.1483, 8.0483, 7.9495, 7.8520, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 9.0060, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.1615, 9.0773, 9.1927, 9.1094,\n 9.2240, 9.1414, 9.2554, 9.3686, 9.2867, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 9.8197, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.2790, 10.3827, 10.3065, 10.2310, 10.1559,\n 10.2591, 10.1846, 10.2872, 10.2132, 10.3154, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.6722, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.7527, 10.8505, 10.9480, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How were the Portuguese expelled from Myanmar?\nContext: From the 1720s onward, the kingdom was beset with repeated Meithei raids into Upper Myanmar and a nagging rebellion in Lan Na.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -0.7746,\n -0.8340, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.4289,\n -1.2708, -1.3166, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.7679, -1.8086, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -1.8985, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.7792, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.7780, -1.8155, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.9618, 9.1051, 9.2469, 9.1130, 8.9815,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.7065, 9.5876, 9.4705, 9.3550,\n 9.2410, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.0535, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.2740, 10.3908, 10.2923, 10.4083, 10.3110, 10.2146, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.9060, 11.8151, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.0476, 11.9594, 11.8719, 11.9737, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.5129, 12.4289, 12.3455, 12.2627, 12.1805, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.9621, 12.8817,\n 12.8019, 12.7226, 12.6439, 12.7378, 12.8313, 12.9244, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.1376, 13.0608, 13.1520, 13.0758, 13.1667,\n 13.0910, 13.0157, 12.9410, 13.0316, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What does the word 'customer' properly apply to?\nContext: The bill also required rotation of principal maintenance inspectors and stipulated that the word \"customer\" properly applies to the flying public, not those entities regulated by the FAA.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.1633, -0.2425, 0.0000, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.8131, 1.0265, 1.2366, 1.4434,\n 1.6471, 1.8477, 1.7634, 1.6803, 1.5986, 1.7942, 1.7130, 1.9052,\n 1.8245, 2.0135, 1.9333, 1.8543, 1.7765, 1.6997, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.6908, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.6983, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.9238, 1.0659, 1.0139, 1.1547,\n 1.2946, 1.2423, 1.3810, 1.5187, 1.4662, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.5818, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.3443, 1.4743, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.0328, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.0632, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 9.0323, 8.9314, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.7725, 9.6828, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.3630, 10.2743, 10.1865, 10.2975,\n 10.2106, 10.3209, 10.2348, 10.1494, 10.2592, 10.3683, 10.4769, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.7367, 10.6537, 10.7594, 10.8644,\n 10.9689, 10.8867, 10.8051, 10.9091, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.2602, 11.3610, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.5109, 11.6082, 11.7050, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.0127, 11.9380, 12.0327, 11.9586, 12.0529, 11.9792, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.1141, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What did Arsenal consider the yellow and blue colors to be after losing a FA Cup final wearing red and white?\nContext: Arsenal then competed in three consecutive FA Cup finals between 1978 and 1980 wearing their \"lucky\" yellow and blue strip, which remained the club's away strip until the release of a green and navy away kit in 1982\u201383.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "27.5%", + "z-score": "0.608", + "p value": "0.271", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.6083])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.8297, 3.7009, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.5850, 4.7703, 4.9528, 4.8394, 4.7281, 4.6188,\n 4.5115, 4.6904, 4.8669, 5.0410, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.7469, 6.6469,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.3901, 7.5258, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.6867, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.4173, 8.3324, 8.2483, 8.3691,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.7610, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.2867, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.7224, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.5725, 10.5001, 10.4281, 10.3566, 10.4563, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.7090, 10.6389, 10.7363, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.1475, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who starred in 'True Love'?\nContext: The show starred Ted Danson as Dr. John Becker, a doctor who operated a small practice and was constantly annoyed by his patients, co-workers, friends, and practically everything and everybody else in his world.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.3289, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.7216, -0.5927, -0.6333, -0.6737, -0.5459, -0.5864, -0.6266, -0.5000,\n -0.3740, -0.2487, -0.2894, -0.3299, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.1584, 5.9797, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.1905,\n 10.0855, 9.9817, 9.8792, 9.7778, 9.6775, 9.7986, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.2146, 10.1193,\n 10.0249, 9.9315, 10.0472, 9.9547, 10.0698, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 11.9883,\n 12.0891, 12.1893, 12.1036, 12.2034, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 12.9116, 13.0067,\n 13.1014, 13.0185, 12.9363, 13.0307, 13.1246, 13.2182, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.5985, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.8007, 13.8904, 13.9797, 13.9007, 13.9897, 13.9113, 14.0000,\n 14.0884, 14.0106, 13.9332, 14.0214, 14.1091, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who was elected as the Watch Tower Society's president in January of 1917?\nContext: His election was disputed, and members of the Board of Directors accused him of acting in an autocratic and secretive manner.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, 0.1045, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, -0.1295, 0.0000, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.2067, 0.3299, 0.2879, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 7.7723, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.1881, 8.0667, 8.2121, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.4285, 8.3152, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.6678, 8.8007, 8.9324, 8.8260, 8.7210,\n 8.8518, 8.9815, 8.8780, 8.7757, 8.9045, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.0389, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.1971, 11.3039, 11.2142, 11.3204, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.3883, 12.4870, 12.4015, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.6234, 12.7199, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.0307, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.1491, 13.0688, 12.9891, 13.0821, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.3967, 13.3196, 13.2429, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.5265, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What do most open education sources offer?\nContext: The conventional merit-system degree is currently not as common in open education as it is in campus universities, although some open universities do already offer conventional degrees such as the Open University in the United Kingdom.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.2222, -0.8729, -0.9649, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.5447, 0.4815, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.8295, 0.9909, 1.1508, 1.3093,\n 1.2492, 1.4059, 1.3460, 1.5010, 1.4412, 1.5945, 1.5348, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.5430, 1.6906, 1.8371,\n 1.9825, 2.1268, 2.0682, 2.0101, 2.1527, 2.0948, 2.0373, 1.9803,\n 1.9237, 1.8676, 1.8119, 1.7566, 1.7018, 1.8411, 1.7864, 1.9245,\n 2.0617, 2.0068, 1.9524, 1.8983, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.8204, 1.7679, 1.7158, 1.8490, 1.9813, 2.1128, 2.2436, 2.3735,\n 2.3206, 2.4495, 2.3967, 2.5247, 2.4721, 2.5990, 2.5466, 2.6726,\n 2.6203, 2.5683, 2.6932, 2.6414, 2.5898, 2.5386, 2.6623, 2.7852,\n 2.9076, 3.0292, 2.9776, 2.9263, 3.0469, 2.9957, 2.9448, 2.8943,\n 2.8440, 2.7940, 2.7443, 2.6949, 2.6458, 2.7644, 2.7154, 2.8333,\n 2.9507, 2.9016, 2.8528, 2.8043, 2.9205, 2.8721, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.7%", + "z-score": "16.2", + "p value": "4.38e-59", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 4.6667, 4.3235, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 7.3054, 7.1187, 6.9378,\n 7.1232, 6.9488, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.2168, 7.0711, 6.9286, 6.7893,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.2967, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.5657, 12.6684, 12.5717, 12.4759,\n 12.5782, 12.4834, 12.5853, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.2936, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 14.1543, 14.2464, 14.1582, 14.2499, 14.3412, 14.4321,\n 14.5226, 14.4355, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 14.8878, 14.9755, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.2387, 15.3247, 15.4103, 15.4956, 15.5805, 15.6651, 15.7494, 15.8333,\n 15.7507, 15.8344, 15.9178, 16.0009, 16.0836, 16.1660])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which collection of minor poems are sometimes attributed to Virgil?\nContext: A number of minor poems, collected in the Appendix Vergiliana, are sometimes attributed to him.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "54", + "# Tokens in Greenlist": "13", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.157", + "p value": "0.562", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "56", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "66.1%", + "z-score": "7.1", + "p value": "6.33e-13", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: While looking for bugs, what else can testing do?\nContext: Although testing can determine the correctness of software under the assumption of some specific hypotheses (see hierarchy of testing difficulty below), testing cannot identify all the defects within software.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -1.8829, -1.9245,\n -1.9658, -2.0068, -1.8571, -1.7085, -1.7500, -1.7913, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.6827, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.6444, -1.6830, -1.7213, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.1698, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.0895, 6.9646, 6.8419, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.4449, 8.3463, 8.2488, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.6667, 8.7927, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.5620, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.4652, 10.3835, 10.3024,\n 10.2220, 10.3284, 10.2486, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.1702, 11.0937, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.6297, 11.5549, 11.6514, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.7901, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.7611, 11.6893, 11.7833, 11.7120, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How much hydroelectric power can be generated?\nContext: The state is also the first state in India to achieve the goal of having a bank account for every family.[citation needed]\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.9245, -0.9676, -0.8268, -0.8700, -0.7303,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -1.0890, -0.9608, -1.0000,\n -1.0390, -1.0777, -0.9509, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.0219, 6.8718, 7.0456, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.1493, 9.0401, 9.1706, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.3088, 9.4327, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.2146, 10.3301,\n 10.2348, 10.3496, 10.4636, 10.3695, 10.4829, 10.5955, 10.7074, 10.6145,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.9637, 10.8729, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.4261, 11.3373, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 11.8172, 11.7326, 11.8336, 11.9341, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.6785, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.8019, 12.8957, 12.8165, 12.9099, 13.0030, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.1233, 13.0460, 12.9691, 13.0608, 12.9845, 12.9087, 13.0000,\n 13.0910, 13.0157, 13.1063, 13.0316, 12.9574, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What two people were killed inside the store?\nContext: The dead included two men from Northern California who had merely been visiting the store's owner, their cousin, to see if they could open a similar store in their area.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.0658, 0.1307, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.0508, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.0896, 0.0447, 0.0000,\n 0.1332, 0.2657, 0.3974, 0.3522, 0.3073, 0.2626, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.2111, 0.3369, 0.4620, 0.5864, 0.7102, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.8107, 5.0186, 4.8742, 5.0779, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.0623, 4.9316, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.8458, 6.7333, 6.8876, 7.0401, 6.9294, 6.8205, 6.9714,\n 6.8641, 7.0133, 6.9076, 6.8034, 6.7006, 6.8483, 6.9945, 6.8931,\n 6.7931, 6.9378, 6.8391, 6.7416, 6.8849, 6.7886, 6.9305, 6.8354,\n 6.7414, 6.8819, 7.0211, 7.1591, 7.0662, 7.2029, 7.1110, 7.0201,\n 7.1556, 7.2900, 7.2001, 7.1111, 7.2443, 7.1563, 7.2884, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.5472, 7.6742, 7.5895,\n 7.7155, 7.8406, 7.7566, 7.6734, 7.7976, 7.7152, 7.8384, 7.7567,\n 7.6758, 7.5955, 7.7178, 7.8393, 7.7597, 7.6808, 7.8014, 7.7232,\n 7.6456, 7.7653, 7.6883, 7.8072, 7.7308, 7.6551, 7.7732, 7.8905,\n 8.0070, 7.9318, 8.0476, 7.9729, 7.8988, 8.0139, 8.1282, 8.0546,\n 7.9816, 8.0952, 8.0227, 8.1356, 8.0636, 7.9921, 8.1043, 8.2158,\n 8.3268, 8.4371, 8.3660, 8.4757, 8.4050, 8.5141, 8.6226, 8.5524,\n 8.4826, 8.5905, 8.5212, 8.6284, 8.5595, 8.4911, 8.4232, 8.5298,\n 8.6359, 8.5683, 8.5012, 8.6066, 8.5399, 8.4736, 8.5785, 8.5126,\n 8.6169, 8.5513, 8.4862, 8.5899, 8.6932, 8.7959, 8.7311, 8.8333,\n 8.7689, 8.7048, 8.8065, 8.9077, 8.8439, 8.7805, 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How is Nirvana achieved?\nContext: In Theravada Buddhism, the ultimate goal is the attainment of the sublime state of Nirvana, achieved by practicing the Noble Eightfold Path (also known as the Middle Way), thus escaping what is seen as a cycle of suffering and rebirth.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.7877, 0.9949, 0.9169, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.8793, 1.0498, 1.2185, 1.1547, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.9972, 1.1500, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.2611, 1.4071, 1.3517, 1.4963, 1.4410, 1.3862,\n 1.5291, 1.4744, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.3448, 1.2943, 1.2441, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.4427, 1.5731, 1.5236, 1.6530, 1.6036,\n 1.5544, 1.6827, 1.6336, 1.5848, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.3933, 1.5159, 1.4699, 1.4241, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.3644, 1.4846, 1.6042, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.0751, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.8419, 6.7213, 6.8810, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.8876, 7.0401, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.6210, 7.5258, 7.6603, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.3164, 8.4444, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.7327, 8.6436,\n 8.5553, 8.4679, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.3686, 9.2867, 9.3993, 9.3181, 9.4301,\n 9.3495, 9.2697, 9.1905, 9.1119, 9.2232, 9.3338, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.3212, 9.4299, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.9648, 10.0701, 9.9940, 9.9184, 10.0231, 9.9481, 10.0523, 9.9778,\n 9.9038, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.1690, 10.2706,\n 10.1981, 10.1262, 10.2273, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.7527, 10.6817, 10.7795, 10.7090, 10.8064, 10.7363, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.0521, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What conflict overseen by President Polk might be the source of Tennessee's nickname?\nContext: This explanation is more likely, because President Polk's call for 2,600 nationwide volunteers at the beginning of the Mexican-American War resulted in 30,000 volunteers from Tennessee alone, largely in response to the death of Davy Crockett and appeals by former Tennessee Governor and now Texas politician, Sam Houston.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.3904, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.7789, 0.7336, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.5000,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 5.6737, 5.8966, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.8320, 7.6613, 7.8320, 7.6667,\n 7.8355, 7.6751, 7.5186, 7.3659, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 7.7710, 7.6512, 7.5333, 7.4174, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 7.8428, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.7927, 8.6976, 8.8227, 8.9469,\n 9.0702, 8.9763, 9.0987, 9.0057, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.7473, 9.8632, 9.9783, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.3630, 10.4738, 10.5841, 10.4956,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.0756, 10.9898, 11.0952, 11.0102, 11.1151, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.7031, 11.6219, 11.5414, 11.4614, 11.5613, 11.4819, 11.4031,\n 11.3249, 11.4244, 11.3468, 11.2698, 11.3688, 11.4674, 11.3910, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.4581, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.6217, 11.5489, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In Chinese Buddhism what meditation is more popular?\nContext: According to Routledge's Encyclopedia of Buddhism, in contrast, throughout most of Buddhist history before modern times, serious meditation by lay people has been unusual.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.0534, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.2603, 0.2074, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.5064, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.6881, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.3800, 0.3369, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.6598, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 7.9754, 8.1196, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 8.9086, 8.8007, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.0323, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.3333, 9.4563, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 9.7912, 9.9085,\n 10.0249, 9.9315, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.3630, 10.4738, 10.3853, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.6481, 10.5621, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.9259, 11.0309, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.7405, 11.8393, 11.9377,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.2467, 12.1677, 12.2638, 12.3595,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.9540, 12.8771, 12.9691, 12.8928, 12.9845, 13.0758, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When did European sport clubs begin to form in the Ottoman empire?\nContext: The main sports Ottomans were engaged in were Turkish Wrestling, hunting, Turkish archery, horseback riding, Equestrian javelin throw, arm wrestling, and swimming.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.5820, -2.6458, -2.7080, -2.7689, -2.3570,\n -1.9630, -2.0381, -2.1111, -2.1822, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.2418, -2.2998, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.2186, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.1268, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.1783,\n -2.2197, -2.0642, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.3221, -2.1896, -2.2258, -2.2618, -2.2977, -2.1667,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188, 4.9010, 5.1711,\n 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855, 4.9652, 5.2085, 5.4444,\n 5.6737, 5.8966, 5.6921, 5.9106, 5.7155, 5.5277, 5.3468, 5.5626, 5.3886,\n 5.6000, 5.4322, 5.2697, 5.4772, 5.6805, 5.8797, 5.7229, 5.9186, 5.7664,\n 5.9588, 6.1477, 6.0000, 5.8560, 5.7155, 5.5783, 5.7646, 5.6307, 5.8140,\n 5.6830, 5.5549, 5.7354, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828,\n 6.3509, 6.5166, 6.3960, 6.2776, 6.1612, 6.3249, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.7242, 5.6220, 5.7812, 5.9386, 5.8377,\n 5.7382, 5.6401, 5.7955, 5.9491, 6.1012, 6.0041, 5.9084, 6.0587, 6.2075,\n 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354, 6.7414, 6.6486, 6.5569,\n 6.6973, 6.8364, 6.9743, 6.8834, 7.0201, 7.1556, 7.2900, 7.4233, 7.5556,\n 7.4655, 7.5967, 7.7268, 7.8558, 7.9839, 8.1111, 8.2372, 8.1481, 8.0598,\n 7.9724, 8.0976, 8.0111, 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.5381,\n 8.4532, 8.3691, 8.2858, 8.2032, 8.1214, 8.2413, 8.3605, 8.2793, 8.3977,\n 8.3172, 8.2375, 8.3550, 8.4718, 8.3927, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.6556, 8.7681, 8.6921, 8.8039, 8.9151,\n 9.0257, 9.1357, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113, 9.4188, 9.3443,\n 9.2704, 9.1970, 9.3040, 9.4103, 9.5161, 9.4432, 9.3708, 9.4761, 9.5808,\n 9.6850, 9.7886, 9.8918, 9.8198, 9.9224, 9.8510, 9.9531, 9.8821, 9.9837,\n 9.9132, 9.8431, 9.7735, 9.7043, 9.6356, 9.7367, 9.8373, 9.9374, 9.8691,\n 9.9687, 9.9008, 9.8333, 9.9325, 9.8654, 9.7987, 9.7325, 9.8311, 9.9294,\n 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What part of their motherboards does Dell not reveal the specifications of?\nContext: While motherboard power connections reverted to the industry standard in 2003, Dell continues to remain secretive about their motherboard pin-outs for peripherals (such as MMC readers and power on/off switches and LEDs).\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "111", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "35.1%", + "z-score": "2.47", + "p value": "0.00683", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.3744, 1.2810, 1.1896, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.4434,\n 1.6471, 1.5635, 1.4812, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.5542, 1.7450, 1.6667, 1.8543, 2.0397, 2.2226, 2.1436, 2.0656,\n 2.2453, 2.1678, 2.0913, 2.0158, 1.9413, 1.8677, 2.0426, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.9262, 2.0954, 2.2629, 2.4286, 2.3570,\n 2.2862, 2.2162, 2.1470, 2.0785, 2.0107, 2.1723, 2.1049, 2.0381,\n 2.1974, 2.3552, 2.2884, 2.4444, 2.3779, 2.3120, 2.4660])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.1793, 1.4757, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 2.2011, 2.4371, 2.3238, 2.5538, 2.7791, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.3706, 2.2743, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.9335, 2.1320, 2.0455, 1.9604, 2.1546, 2.3462, 2.5352, 2.7217,\n 2.6354, 2.8189, 2.7333, 2.9140, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.2717, 3.4429, 3.3587, 3.2757, 3.1937, 3.1129, 3.2806, 3.4466,\n 3.3659, 3.5298, 3.6919, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.4586, 3.3826, 3.3075, 3.2332, 3.3895, 3.3156, 3.2426, 3.1704,\n 3.0989, 3.0282, 2.9582, 2.8889, 3.0415, 3.1928, 3.1236, 3.0551,\n 3.2044, 3.3526, 3.4995, 3.6452, 3.5762, 3.7205, 3.6519, 3.7947,\n 3.9365, 4.0771, 4.0085, 3.9404, 4.0795, 4.2176, 4.1498, 4.0825,\n 4.0158, 3.9497, 4.0859, 4.2212, 4.1552, 4.2893, 4.4224, 4.3566,\n 4.2914, 4.2267, 4.1625, 4.0988, 4.2301, 4.1667, 4.1038, 4.0415,\n 4.1713, 4.1092, 4.0476, 3.9865, 3.9258, 3.8655, 3.8057, 3.7463,\n 3.8741, 4.0011, 3.9418, 3.8829, 4.0087, 4.1338, 4.2582, 4.3818,\n 4.3226, 4.4454, 4.3865, 4.5083, 4.6295, 4.7500, 4.6911, 4.6325,\n 4.7520, 4.8709, 4.8125, 4.7544, 4.6968, 4.6395, 4.7572, 4.8742,\n 4.8170, 4.9333, 5.0489, 4.9918, 4.9351, 4.8787, 4.8227, 4.7670,\n 4.8815, 4.8260, 4.7709, 4.7161, 4.8295, 4.7749, 4.7206, 4.6667,\n 4.6130, 4.5596, 4.5066, 4.4538, 4.5659, 4.6775, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the highest order of species n land?\nContext: The climate was much more humid than the Triassic, and as a result, the world was very tropical.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.2111, -2.2528, -2.0948, -2.1367, -1.9803,\n -2.0224, -1.8676, -1.9098, -1.9518, -1.9935, -1.8411, -1.8829, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.7085, -1.7500, -1.7913, -1.8324, -1.6859,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -1.9906, -2.0282, -1.8935, -1.9311, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.7780, -1.8155, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.7039, -1.5752, -1.6125, -1.6496, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.2232, 7.3638, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.7555, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.7387, 7.8699, 8.0000, 7.9079, 8.0370, 7.9460, 7.8558,\n 7.7667, 7.8948, 8.0219, 7.9336, 7.8463, 7.7598, 7.8859, 8.0111,\n 7.9254, 8.0497, 7.9649, 7.8808, 7.7976, 7.7152, 7.8384, 7.9608,\n 7.8791, 8.0006, 8.1214, 8.2413, 8.3605, 8.4788, 8.5964, 8.5153,\n 8.6321, 8.7482, 8.8636, 8.7831, 8.7033, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.2232, 9.3338, 9.2559, 9.1785,\n 9.2885, 9.2118, 9.1357, 9.2450, 9.3537, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.6322, 9.7380, 9.6635, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.7574, 9.6850, 9.6130, 9.7167, 9.8198, 9.7483,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.6356, 9.5673, 9.6684, 9.7690, 9.7011, 9.8012, 9.9008, 10.0000,\n 10.0987, 10.1970, 10.2949, 10.2273, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What did Darwin speculate might be how inheritable variations might come about in a species?\nContext: Darwin also admitted ignorance of the source of inheritable variations, but speculated they might be produced by environmental factors.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "79", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "21.5%", + "z-score": "-0.715", + "p value": "0.763", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.3054, 7.1187, 7.3030,\n 7.1232, 7.3051, 7.1317, 7.3113, 7.4878, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.8420, 8.0064, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 8.9455, 9.0924, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.6630, 9.7989, 9.9333, 10.0664, 10.1983, 10.0673, 10.1982, 10.3280,\n 10.1999, 10.0737, 10.2030, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.4650, 10.3459, 10.4704, 10.3532, 10.4770, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.7348, 10.8542, 10.9727, 10.8616, 10.7518, 10.8699,\n 10.7616, 10.8790, 10.9955, 10.8889, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 11.8132, 11.9213, 11.8212, 11.7222, 11.8299, 11.7320, 11.8392,\n 11.9457, 11.8491, 11.9551, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 12.9011, 13.0000, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.4977, 13.5929, 13.6876, 13.5987, 13.6931,\n 13.6050, 13.6990, 13.7926, 13.8857, 13.9784, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.2584, 14.1725, 14.0872, 14.0025, 13.9185, 14.0096, 14.1003,\n 14.0170, 13.9343, 14.0248, 14.1149, 14.2046, 14.2939, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: The environmental intervention was linked to the conceptualization of what process?\nContext: Between 1791 and 1833, Saint Helena became the site of a series of experiments in conservation, reforestation and attempts to boost rainfall artificially.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856,\n 1.7321, 2.0381, 1.8889, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.9949, 0.9169, 1.1202, 1.3206, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.9567, 1.1114, 1.0541,\n 1.2072, 1.3590, 1.5097, 1.6591, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.5758, 1.5191, 1.4629, 1.6081, 1.5519, 1.4963, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.9979, 0.9497, 0.9017, 0.8540, 0.9858, 0.9382, 1.0690,\n 1.0215, 0.9742, 1.1038, 1.2326, 1.1852, 1.3131, 1.4402, 1.5667,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.6555, 1.6087, 1.5621, 1.6843, 1.6378, 1.5916, 1.5457, 1.6667,\n 1.6208, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "193", + "Fraction of T in Greenlist": "97.0%", + "z-score": "23.5", + "p value": "6.41e-122", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.4293, 8.5979, 8.7636,\n 8.9265, 9.0869, 9.2447, 9.4002, 9.5534, 9.7043, 9.8532, 10.0000,\n 10.1449, 10.2879, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 11.1098,\n 11.2414, 11.3715, 11.5002, 11.6276, 11.7536, 11.8784, 12.0020, 12.1244,\n 12.2456, 12.3656, 12.4846, 12.6025, 12.7194, 12.8352, 12.9501, 13.0639,\n 13.1769, 13.2889, 13.4000, 13.5102, 13.6196, 13.7281, 13.8358, 13.9427,\n 14.0489, 14.1542, 14.2588, 14.3626, 14.4658, 14.5682, 14.6699, 14.7710,\n 14.8714, 14.9711, 15.0702, 15.1686, 15.2665, 15.3637, 15.4603, 15.5563,\n 15.6518, 15.7467, 15.8411, 15.9349, 16.0281, 16.1209, 16.2131, 16.3048,\n 16.3960, 16.4867, 16.5769, 16.6667, 16.7559, 16.8447, 16.9331, 17.0210,\n 17.1085, 17.1955, 17.2821, 17.3682, 17.4540, 17.5393, 17.6242, 17.7088,\n 17.7929, 17.8766, 17.9600, 18.0430, 18.1256, 18.2078, 18.2897, 18.3712,\n 18.4523, 18.5331, 18.6136, 18.6937, 18.7735, 18.8529, 18.9320, 19.0108,\n 19.0893, 19.1675, 19.2453, 19.3228, 19.4000, 19.4770, 19.5536, 19.6299,\n 19.7059, 19.7817, 19.8571, 19.9323, 20.0072, 20.0818, 20.1562, 20.2303,\n 20.3041, 20.3776, 20.4509, 20.5239, 20.5967, 20.6692, 20.7414, 20.8135,\n 20.8852, 20.9567, 21.0280, 21.0991, 21.1699, 21.2404, 21.3108, 21.3809,\n 21.4508, 21.5204, 21.5899, 21.6591, 21.7281, 21.7969, 21.8654, 21.9338,\n 22.0019, 22.0699, 22.1376, 22.2051, 22.2724, 22.3395, 22.4065, 22.4732,\n 22.5397, 22.6060, 22.6722, 22.7381, 22.8039, 22.8694, 22.9348, 23.0000,\n 23.0650, 23.1298, 23.1945, 23.2590, 23.3233, 23.3874, 23.4513])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How much of the Bronx vote did Hillquit get in 1917?\nContext: The only Republican to carry the Bronx since 1914 was Fiorello La Guardia in 1933, 1937 and 1941 (and in the latter two elections, only because his 30-32% vote on the American Labor Party line was added to 22-23% as a Republican).\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 1.1793, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.8034, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.9599, 2.2011, 2.0889, 1.9795, 2.2133, 2.1054, 2.0000,\n 1.8970, 1.7963, 1.6977, 1.9215, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.7889, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.9335, 1.8477, 2.0455, 2.2404, 2.1546, 2.0702, 1.9870, 2.1773,\n 2.0948, 2.2819, 2.4667, 2.3842, 2.3028, 2.2226, 2.4034, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.0158, 1.9413, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 1.9262, 1.8559, 1.7865, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.8773, 1.8116,\n 1.7467, 1.9066, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.9920, 1.9298, 1.8682, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.5758, 1.5191, 1.6646, 1.6081, 1.5519, 1.6958, 1.8385, 1.7823,\n 1.9237, 1.8676, 1.8119, 1.7566, 1.7018, 1.8411, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.7384, 1.6859,\n 1.6337, 1.5818, 1.5303, 1.4792, 1.6127, 1.5617, 1.6941, 1.6432,\n 1.5926, 1.7237, 1.8541, 1.9837, 2.1125, 2.0613, 2.0105, 1.9599,\n 1.9097, 2.0369, 2.1634, 2.1131, 2.0631, 2.0134, 1.9640, 1.9149,\n 1.8660, 1.8175, 1.9419, 1.8935, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.8252, 1.7780, 1.8999, 1.8527, 1.8058, 1.9267, 2.0470, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.8145, 1.7688, 1.8874, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.6339, 7.5234, 7.4146, 7.5593, 7.7026, 7.8444, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.4915, 12.5930, 12.5001, 12.6012, 12.7017,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.0146, 12.9249,\n 13.0226, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.2324, 13.1453,\n 13.0590, 12.9732, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.2791, 13.1957, 13.2895, 13.3829, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.8434, 13.7612, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is restricted unless the film has a traditional theater release?\nContext: Deaner further explained the matter in terms of the Australian film industry, stating: \"there are currently restrictions on quantities of tax support that a film can receive unless the film has a traditional cinema release.\"\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "29.2%", + "z-score": "0.788", + "p value": "0.215", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "154", + "Fraction of T in Greenlist": "77.4%", + "z-score": "17.1", + "p value": "1.31e-65", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.6307, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.6647, 5.8398, 5.7192, 5.8919, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 7.8113, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.7875, 12.8877, 12.9874, 13.0866,\n 12.9935, 13.0922, 13.1905, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.8642, 13.9585, 13.8675, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.3330, 14.4248, 14.5161, 14.6070, 14.6976, 14.7877, 14.6986, 14.7885,\n 14.8779, 14.9669, 15.0555, 15.1438, 15.2316, 15.3191, 15.4062, 15.4929,\n 15.5793, 15.4922, 15.5783, 15.6641, 15.7495, 15.8345, 15.9193, 16.0036,\n 16.0877, 16.1713, 16.2547, 16.3377, 16.2525, 16.3353, 16.4178, 16.5000,\n 16.5819, 16.6634, 16.7447, 16.8256, 16.9063, 16.9866, 17.0667])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Along with Bermuda, what other English colony did many settlers come from?\nContext: Among these were free people of color, born in the West Indies of alliances and marriages between Africans and Englanders, when color lines were looser among the working class in the early colonial years, and some wealthy whites took black consorts or concubines.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.2335, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.4121, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.3369, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "150", + "Fraction of T in Greenlist": "75.4%", + "z-score": "16.4", + "p value": "7.87e-61", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 5.4772, 5.7446, 6.0000, 6.2450, 6.4807, 6.7082, 6.9282,\n 7.1414, 7.3485, 7.5498, 7.7460, 7.9373, 8.1240, 8.3066, 8.4853,\n 8.6603, 8.8318, 9.0000, 8.7287, 8.4697, 8.2219, 8.3993, 8.5732,\n 8.3418, 8.5153, 8.2952, 8.0829, 8.2577, 8.4293, 8.2281, 8.0333,\n 8.2052, 8.3742, 8.5404, 8.3557, 8.5206, 8.6828, 8.8426, 9.0000,\n 9.1551, 8.9815, 9.1355, 9.2874, 9.4373, 9.2710, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.2424, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 9.8072, 9.9454, 10.0820, 10.2172, 10.3510, 10.4834, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.5963, 10.7246, 10.8518, 10.9777, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 12.0386, 11.9138, 11.7907, 11.9062, 12.0208,\n 12.1346, 12.2474, 12.3595, 12.2398, 12.3514, 12.4622, 12.5723, 12.6815,\n 12.5646, 12.4491, 12.5583, 12.6667, 12.7743, 12.6611, 12.7683, 12.8749,\n 12.7634, 12.8696, 12.9750, 12.8653, 12.9704, 13.0748, 13.1785, 13.0707,\n 12.9641, 13.0677, 13.1707, 13.0656, 13.1681, 13.0644, 12.9616, 13.0639,\n 13.1657, 13.2669, 13.3675, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.9594, 14.0561, 13.9565, 14.0530, 14.1489, 14.0505, 13.9531, 14.0489,\n 14.1442, 14.2390, 14.3333, 14.2374, 14.3314, 14.4250, 14.3302, 14.2361,\n 14.3295, 14.2364, 14.3295, 14.4222, 14.5144, 14.6062, 14.5144, 14.6059,\n 14.6970, 14.6062, 14.6970, 14.7874, 14.8773, 14.9669, 15.0560, 15.1448,\n 15.2332, 15.1440, 15.2321, 15.3198, 15.2316, 15.3191, 15.2316, 15.3188,\n 15.4057, 15.4922, 15.5783, 15.4919, 15.5778, 15.6634, 15.5778, 15.6631,\n 15.7481, 15.8327, 15.9170, 16.0009, 16.0845, 16.1678, 16.0836, 16.1667,\n 16.0832, 16.1660, 16.0832, 16.1658, 16.2481, 16.3301, 16.4118])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How does aposematism help a species population?\nContext: While that particular prey organism may be killed, the coloring benefits the prey species as a whole.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.3862,\n -1.4305, -1.2778, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.1237, -1.1651, -1.0276, -1.0690,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.4856, 8.3324,\n 8.4884, 8.6423, 8.4936, 8.3480, 8.5010, 8.6522, 8.5105, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.7250, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.1966, 11.0810, 11.1990, 11.3161, 11.2025, 11.0902, 11.2069, 11.3228,\n 11.2124, 11.1033, 10.9955, 11.1111, 11.2259, 11.3399, 11.4531, 11.3473,\n 11.4599, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.6966, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.2360, 12.3419, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.9662, 13.0674, 13.1680, 13.2680,\n 13.3674, 13.4664, 13.3689, 13.2722, 13.3710, 13.4691, 13.3737, 13.2791,\n 13.3770, 13.4745, 13.3810, 13.2882, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.5827, 13.4920, 13.5876, 13.4977, 13.5929, 13.5039, 13.5987, 13.6931,\n 13.7870, 13.8804, 13.9735, 14.0660, 14.1582, 14.2499, 14.3412, 14.2539,\n 14.3449, 14.4355, 14.5257, 14.6155, 14.7049, 14.7939, 14.8825, 14.9707,\n 15.0585, 15.1460, 15.2331, 15.3198, 15.2345, 15.1498, 15.2364, 15.3226,\n 15.2387, 15.1553, 15.2414, 15.3272, 15.2446, 15.1625, 15.0810, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What does confrontational scavenging involve doing to other predators after they've made a kill?\nContext: Robert Blumenschine proposed the idea of confrontational scavenging, which involves challenging and scaring off other predators after they have made a kill, which he suggests could have been the leading method of obtaining protein-rich meat by early humans.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.5822, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.2626, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 7.8174, 7.6883, 7.5615, 7.4370, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.1564, 10.0577, 10.1754, 10.0779, 9.9813, 9.8858, 9.7912, 9.9085,\n 10.0249, 9.9315, 9.8389, 9.7473, 9.6566, 9.5668, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 10.0504, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.5002, 10.6076, 10.5238, 10.6306, 10.7367, 10.8423, 10.7594, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.4209, 11.5217, 11.6219, 11.5414, 11.6412, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.7787, 11.7000, 11.6220, 11.7200, 11.6425, 11.5655, 11.4891,\n 11.4132, 11.5109, 11.6082, 11.5329, 11.4581, 11.3837, 11.3099, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.5489, 11.6441, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.9487, 11.8769, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Why were dogs initially selected?\nContext: Unlike other domestic species which were primarily selected for production-related traits, dogs were initially selected for their behaviors.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "39", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "20.5%", + "z-score": "-0.647", + "p value": "0.741", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -0.9366, -0.6472])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.5347, 8.6817, 8.5491, 8.6948, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.1455, 9.2828, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 9.8367, 9.9656, 9.8473,\n 9.9754, 9.8590, 9.9863, 10.1124, 10.2375, 10.3615, 10.4846, 10.3709,\n 10.4932, 10.3812, 10.5027, 10.6232, 10.5131, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.3473,\n 11.2427, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.4849, 11.5950,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.9288, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.0516, 12.1568, 12.2615, 12.1652, 12.2694, 12.3729, 12.2778,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.7017,\n 12.8017, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.0146, 13.1122,\n 13.0226, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.4155, 13.5105,\n 13.6050, 13.5176, 13.6117, 13.7054, 13.7986, 13.8914, 13.9838, 13.8976,\n 13.9896, 13.9042, 13.9959, 13.9111, 14.0025, 14.0936, 14.0096, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.4651, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.6534, 14.7406, 14.6599, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 14.8462, 14.9318, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In what century was the church established at the location?\nContext: Construction of the present church began in 1245, on the orders of King Henry III.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 1.0659, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.9867, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 1.0954,\n 1.2285, 1.1794, 1.1305, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.1038, 1.2326, 1.1852, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.2982, 1.4241, 1.3771, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.2244, 1.1790, 1.1339, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.2435, 1.1990, 1.3197, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.5260, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.5749, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.1157, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.3423, 11.4450, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.8673,\n 11.9669, 11.8846, 11.8028, 11.9020, 12.0008, 12.0990, 12.0180, 11.9377,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.0608, 13.1520, 13.0758, 13.1667,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the term that refers to areas where an antennas radiation is zero?\nContext: The radiation of many antennas shows a pattern of maxima or \"lobes\" at various angles, separated by \"nulls\", angles where the radiation falls to zero.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.1796, 0.3573, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.9488, 0.8889, 1.0507, 0.9909, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.7807, 0.9238, 0.8721, 0.8208, 0.9623,\n 0.9110, 0.8601, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.5927, 0.7177, 0.6737, 0.6299, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.9297, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.4370, 7.5907, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.3773, 7.2684, 7.4146, 7.3073, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.4639, 7.6033, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 7.9630, 7.8699, 7.7778, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.9839, 7.8948, 7.8065, 7.9336, 7.8463, 7.9724, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.5381, 8.6581, 8.5732,\n 8.6924, 8.6083, 8.7267, 8.8443, 8.9612, 8.8778, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.2697, 9.3810, 9.4916, 9.6016, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.3289, 10.2516, 10.1749, 10.2790, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 10.9178, 11.0165, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.4300, 11.3572, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.5235, 11.4525, 11.5470, 11.6411, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What did the abbot remain as a town built around the abbey?\nContext: The proximity of the Palace of Westminster did not extend to providing monks or abbots with high royal connections; in social origin the Benedictines of Westminster were as modest as most of the order.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.8772, 4.0980, 3.9620, 4.1779, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.0711,\n 6.9759, 7.1152, 7.0211, 6.9282, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.5556, 7.4655, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.4622, 7.5912, 7.7192, 7.8463, 7.9724, 7.8859, 7.8003,\n 7.7155, 7.6315, 7.7566, 7.8808, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.1214, 8.2413, 8.1602, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.3550, 8.4718, 8.5879, 8.7033, 8.6241, 8.5456, 8.6603,\n 8.5824, 8.5052, 8.6190, 8.7323, 8.6556, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.0257, 8.9502, 9.0601, 8.9851, 8.9107, 9.0200, 9.1287,\n 9.0548, 9.1629, 9.2704, 9.3774, 9.4837, 9.4103, 9.3374, 9.4432,\n 9.3708, 9.2990, 9.4042, 9.5089, 9.4375, 9.5416, 9.6452, 9.7483,\n 9.8510, 9.7800, 9.7095, 9.8116, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.8054, 9.9060, 10.0061, 10.1058, 10.2050, 10.1363, 10.0679, 10.1667,\n 10.0987, 10.0312, 10.1295, 10.2273, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.0553, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.3164, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 9.0060, 8.9178, 8.8304, 8.9496, 8.8631, 8.9815,\n 9.0991, 9.0134, 8.9285, 9.0453, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.4812, 9.5931, 9.7044, 9.6225,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.7908, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.7415, 10.6665, 10.7671, 10.8673, 10.7928,\n 10.7189, 10.8186, 10.9178, 11.0165, 10.9431, 11.0414, 11.1392, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.2129, 11.3091, 11.4047, 11.3333,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What library was estimated to have 700,000 volumes?\nContext: The city of Pergamon also had a large library and became a major center of book production.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "69", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "27.5%", + "z-score": "0.487", + "p value": "0.313", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "154", + "Fraction of T in Greenlist": "77.4%", + "z-score": "17.1", + "p value": "1.31e-65", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.4983, 7.6800, 7.4885, 7.6681,\n 7.8445, 8.0178, 7.8360, 8.0076, 7.8320, 8.0018, 8.1689, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.2488, 9.3956, 9.2424, 9.3881, 9.5321, 9.3834, 9.5263,\n 9.6676, 9.5230, 9.6632, 9.8020, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.7179, 10.8444,\n 10.7131, 10.8388, 10.9634, 11.0870, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.4442, 11.3196, 11.4388, 11.5570, 11.6743, 11.7907, 11.9062, 12.0208,\n 12.1346, 12.2474, 12.3595, 12.2398, 12.3514, 12.2336, 12.3447, 12.4550,\n 12.5646, 12.4491, 12.5583, 12.4444, 12.5531, 12.6611, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.3840, 13.4859, 13.3789, 13.4804, 13.5813, 13.4758, 13.5764, 13.6763,\n 13.5724, 13.6720, 13.7710, 13.6685, 13.7672, 13.8654, 13.9630, 14.0601,\n 14.1567, 14.2527, 14.3483, 14.4433, 14.5379, 14.4381, 14.5324, 14.4338,\n 14.5277, 14.6212, 14.7143, 14.8069, 14.8990, 14.8021, 14.8940, 14.9854,\n 14.8896, 14.9808, 15.0715, 15.1618, 15.2517, 15.3411, 15.4302, 15.5188,\n 15.6070, 15.6949, 15.6014, 15.6891, 15.5965, 15.6839, 15.7709, 15.8575,\n 15.7661, 15.8525, 15.7619, 15.8481, 15.9339, 15.8443, 15.9299, 16.0151,\n 16.1000, 16.1846, 16.2688, 16.3526, 16.4361, 16.5193, 16.6021, 16.5144,\n 16.5970, 16.5100, 16.5925, 16.6746, 16.5884, 16.6704, 16.7520, 16.6667,\n 16.7481, 16.8292, 16.7447, 16.8256, 16.9063, 16.9866, 17.0667])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was Zhejiang formerly romanized as?\nContext: Zhejiang is bordered by Jiangsu province and Shanghai municipality to the north, Anhui province to the northwest, Jiangxi province to the west, and Fujian province to the south; to the east is the East China Sea, beyond which lie the Ryukyu Islands of Japan.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "39", + "# Tokens in Greenlist": "10", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.0925", + "p value": "0.463", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660, 2.8868,\n 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094, 2.6605, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.7080, 2.5281, 2.8284, 3.1177, 3.3968, 3.6667,\n 3.9279, 3.7524, 3.5839, 3.4219, 3.2660, 3.5176, 3.7626, 4.0012, 3.8490,\n 3.7017, 3.9337, 4.1603, 4.3818, 4.2378, 4.0980, 3.9620, 4.1779, 4.0451,\n 4.2563, 4.4634, 4.3333, 4.5363, 4.4091, 4.2848, 4.1633, 4.0446, 3.9284,\n 4.1260, 4.3205, 4.5118, 4.3970, 4.2844, 4.4721, 4.6571, 4.8394, 5.0190,\n 4.9075, 5.0844, 4.9747, 4.8669, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.5630, 5.4610, 5.6220, 5.5213, 5.4222, 5.3245,\n 5.4832, 5.3867, 5.5435, 5.4482, 5.6032, 5.7566, 5.6622, 5.8139, 5.9641,\n 6.1128, 6.2601, 6.4059, 6.5504, 6.6935, 6.5997, 6.7414, 6.8819, 7.0211,\n 6.9282, 6.8364, 6.9743, 7.1110, 7.0201, 7.1556, 7.2900, 7.2001, 7.1111,\n 7.0231, 7.1563, 7.0692, 7.2012, 7.3322, 7.4622, 7.5912, 7.7192, 7.6328,\n 7.5472, 7.4625, 7.3786, 7.5056, 7.6315, 7.7566, 7.6734, 7.5910, 7.7152,\n 7.8384, 7.9608, 7.8791, 7.7981, 7.7178, 7.6383, 7.7597, 7.8803, 7.8014,\n 7.9212, 7.8429, 7.7653, 7.6883, 7.6120, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.7407, 7.8571, 7.9729, 8.0880, 8.2024, 8.1282, 8.2420, 8.1683,\n 8.0952, 8.0227, 8.1356, 8.2479, 8.3595, 8.4706, 8.5810, 8.6908, 8.6186,\n 8.5469, 8.4757, 8.5848, 8.5141, 8.4439, 8.5524, 8.4826, 8.5905, 8.5212,\n 8.6284, 8.7351, 8.6662, 8.7724, 8.8780, 8.9830, 9.0876, 9.1916, 9.2952,\n 9.3982, 9.3295, 9.4320, 9.5341, 9.6356, 9.7367, 9.6684, 9.6005, 9.5331,\n 9.6336, 9.5666, 9.6667, 9.7663, 9.6996, 9.6334, 9.5675, 9.6666, 9.6011,\n 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who did the children work beside?\nContext: In many cases, men worked from home.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.3504, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.3563,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.6509, 0.7789, 0.7336, 0.6885, 0.6437, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.0928, 8.2369, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.0632, 7.9530, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.0822, 7.9796, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.9495, 7.8520, 7.7555, 7.8889, 7.7937, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.7006, 10.6196, 10.7242, 10.8282, 10.7480, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.0493, 11.9730, 11.8973, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.6949, 11.6217, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.9273, 11.8551, 11.9487, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who was the Bishop in this time frame?\nContext: Construction of the present church began in 1245, on the orders of King Henry III.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.0490, 0.0000, 0.1459, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.4593, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.6742, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.8866, 1.0106, 1.1339, 1.0890, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "192", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "71.4%", + "z-score": "14.8", + "p value": "4.46e-50", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.0456, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 8.8389, 8.9815,\n 8.8522, 8.9935, 8.8667, 8.7419, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.3615, 10.4846, 10.3709,\n 10.4932, 10.3812, 10.2706, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 10.8889, 11.0047, 11.1197, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.6894, 11.7992, 11.6966, 11.8058,\n 11.7045, 11.6041, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.1568, 12.2615, 12.3655, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.9771, 12.8819, 12.9820, 12.8877, 12.9874, 12.8942,\n 12.8017, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 13.9735, 14.0660, 13.9784, 14.0707, 13.9838, 13.8976,\n 13.8120, 13.9042, 13.9959, 14.0872, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.3642, 14.4536, 14.5426, 14.6313, 14.7195, 14.8074, 14.8950, 14.9821,\n 15.0689, 14.9860, 15.0726, 14.9903, 15.0766, 14.9950, 14.9139, 14.8333])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Analysis by what organization detailed that municipality-based rankings may be inaccurate?\nContext: However, an analysis by the Regional Data Cooperative for Greater New Haven, Inc., has shown that due to issues of comparative denominators and other factors, such municipality-based rankings can be considered inaccurate.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 0.9671, 1.1323,\n 1.0705, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.3166, 1.2611, 1.2060, 1.1514, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.4792, 1.4284, 1.5617, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.6732, 1.6230, 1.5731, 1.5236, 1.6530, 1.6036,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.8175, 1.9419, 1.8935, 1.8453, 1.9686, 1.9206, 2.0430,\n 1.9950, 2.1167, 2.0688, 2.0212, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.8605, 1.8145, 1.9333, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.0844, 5.2590, 5.1490, 5.0410, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.9874, 5.8835, 5.7812, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.0474, 5.9491, 6.1012, 6.0041, 5.9084,\n 5.8139, 5.7207, 5.6286, 5.7785, 5.9270, 6.0740, 5.9827, 6.1283,\n 6.0380, 5.9488, 6.0927, 6.0044, 5.9172, 6.0596, 6.2008, 6.3408,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.8019, 6.9361, 7.0692, 7.2012,\n 7.3322, 7.4622, 7.3758, 7.5048, 7.4193, 7.3346, 7.4625, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.6734, 7.7976, 7.7152, 7.6335, 7.5526,\n 7.6758, 7.7981, 7.7178, 7.6383, 7.7597, 7.6808, 7.8014, 7.9212,\n 8.0402, 8.1585, 8.2760, 8.1976, 8.1198, 8.2365, 8.3525, 8.4678,\n 8.3906, 8.5052, 8.6190, 8.7323, 8.6556, 8.7681, 8.8800, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.2450, 9.3537, 9.4619, 9.5695, 9.6764,\n 9.7828, 9.7072, 9.6322, 9.7380, 9.8433, 9.9481, 10.0523, 9.9778,\n 9.9038, 9.8303, 9.7574, 9.6850, 9.7886, 9.8918, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.0547, 9.9837, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.5128, 10.4427, 10.5410, 10.6389, 10.7363, 10.8333,\n 10.9299, 11.0261, 10.9564, 11.0521, 10.9829, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How many trophy designs have there bee?\nContext: The FA decided to change the design after the 1909 winners, Manchester United, made their own replica, leading the FA to realise they did not own the copyright.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "22.2%", + "z-score": "-0.509", + "p value": "0.695", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.5092])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.9279, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.4222, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.1241, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.0401, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.1754, 10.2923, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.1480, 11.2564, 11.1640, 11.2719, 11.3791, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.6206, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 11.9737, 11.8870, 11.8010,\n 11.9024, 11.8172, 11.7326, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.5542, 12.6504,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.7215, 13.8113, 13.9007, 13.9897, 14.0784, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the molecular weight loss of antibacterial compounds?\nContext: Compounds that are still isolated from living organisms are the aminoglycosides, whereas other antibacterials\u2014for example, the sulfonamides, the quinolones, and the oxazolidinones\u2014are produced solely by chemical synthesis.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.3797, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.0467, -0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 1.7457, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.6681, 2.5538, 2.7791, 2.6667,\n 2.8868, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 3.1918, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.9814, 2.8830, 3.0796, 3.2733, 3.1754,\n 3.0793, 2.9848, 2.8919, 3.0806, 2.9887, 3.1743, 3.0833, 3.2660,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.6187, 3.5301, 3.4427, 3.6148,\n 3.5283, 3.4429, 3.6122, 3.7796, 3.9452, 3.8600, 4.0234, 4.1851,\n 4.1003, 4.0166, 4.1761, 4.3339, 4.4901, 4.4066, 4.3241, 4.4783,\n 4.3966, 4.3158, 4.2359, 4.1569, 4.3086, 4.2303, 4.1528, 4.3027,\n 4.4511, 4.3740, 4.2977, 4.4444, 4.3687, 4.5140, 4.4388, 4.5826,\n 4.7252, 4.8666, 5.0070, 5.1461, 5.2842, 5.4212, 5.3455, 5.2705,\n 5.1962, 5.1225, 5.0496, 4.9774, 4.9058, 5.0406, 5.1744, 5.3072,\n 5.4391, 5.5701, 5.7001, 5.8292, 5.9575, 6.0848, 6.0125, 5.9409,\n 6.0671, 6.1926, 6.3172, 6.2458, 6.1750, 6.2985, 6.2282, 6.3509,\n 6.2810, 6.4028, 6.5238, 6.6441, 6.7637, 6.6939, 6.8127, 6.9307,\n 7.0481, 7.1647, 7.0952, 7.2111, 7.3263, 7.4409, 7.5548, 7.4855,\n 7.5988, 7.7114, 7.6424, 7.7544, 7.6859, 7.7971, 7.7291, 7.8397,\n 7.7720, 7.7048, 7.8147, 7.7480, 7.6816, 7.6158, 7.5503, 7.4853,\n 7.4208, 7.3566, 7.4655, 7.4017, 7.5100, 7.4465, 7.3835, 7.3208,\n 7.4283, 7.5353, 7.4729, 7.5794, 7.6853, 7.7907, 7.8956, 7.8333,\n 7.9377, 7.8758, 7.8142, 7.9179, 7.8567, 7.9599, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What colleagues are best to work with to reach a consensus?\nContext: Gephardt added that \"inclusion and empowerment of the people on the line have to be done to get the best performance\" from the minority party.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.9864, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.1111, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.0319, 1.1896, 1.3460, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 1.1882,\n 1.3318, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.5714, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.4105, 1.5423, 1.4923, 1.6230, 1.5731, 1.5236, 1.6530, 1.6036,\n 1.5544, 1.6827, 1.8102, 1.7609, 1.7119, 1.6632, 1.7894, 1.9149,\n 2.0396, 1.9906, 1.9419, 2.0656, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.9473, 1.8999, 2.0212, 1.9738, 2.0943, 2.2141, 2.1667,\n 2.1195, 2.2384, 2.1913, 2.1444, 2.0979, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 4.6188,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.4294, 7.2910, 7.4536, 7.6140, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 7.8667, 7.7426, 7.8928,\n 8.0413, 7.9196, 8.0667, 7.9472, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 9.9187, 9.8198,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.9813, 9.8858, 9.7912, 9.9085,\n 9.8150, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.1810, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.4450, 11.3616, 11.4638, 11.5655, 11.4829, 11.5841, 11.5022,\n 11.4209, 11.5217, 11.4411, 11.3610, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.5820, 12.6757, 12.5986,\n 12.5221, 12.6153, 12.7082, 12.6323, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.9247, 13.0157, 12.9410, 12.8667, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What city was FIFA formed?\nContext: FIFA, the international football body, was formed in Paris in 1904 and declared that they would adhere to Laws of the Game of the Football Association.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.1816, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.6430, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.4914, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.2955, -0.3369, -0.3780, -0.2513, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856, 2.1939, 2.4910, 2.3333,\n 2.1822, 2.4659, 2.3190, 2.5924, 2.4495, 2.7136, 2.5744, 2.4398, 2.3094,\n 2.1831, 2.0605, 2.3113, 2.5560, 2.7952, 3.0290, 2.9055, 3.1334, 3.3566,\n 3.2348, 3.4528, 3.6667, 3.5466, 3.4293, 3.6380, 3.8431, 3.7273, 3.9284,\n 4.1260, 4.0119, 4.2060, 4.3970, 4.2844, 4.1740, 4.3614, 4.5461, 4.4371,\n 4.6188, 4.7980, 4.6904, 4.8669, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.4000, 5.2981, 5.4610, 5.6220, 5.5213, 5.4222, 5.5811,\n 5.7382, 5.6401, 5.7955, 5.9491, 5.8522, 6.0041, 6.1546, 6.0587, 5.9641,\n 6.1128, 6.2601, 6.1664, 6.3122, 6.4566, 6.3640, 6.5069, 6.6486, 6.5569,\n 6.4663, 6.6066, 6.7456, 6.6559, 6.7937, 6.9303, 6.8414, 6.9768, 7.1111,\n 7.0231, 6.9361, 7.0692, 7.2012, 7.1149, 7.2459, 7.3758, 7.2904, 7.4193,\n 7.5472, 7.4625, 7.3786, 7.5056, 7.6315, 7.5484, 7.6734, 7.7976, 7.7152,\n 7.8384, 7.9608, 7.8791, 7.7981, 7.9196, 8.0403, 7.9600, 8.0798, 8.1989,\n 8.1192, 8.2375, 8.3550, 8.2760, 8.1976, 8.3143, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.5052, 8.6190, 8.7323, 8.6556, 8.5796, 8.6921, 8.8039, 8.7284,\n 8.8396, 8.9502, 8.8752, 8.9851, 9.0944, 9.0200, 8.9461, 9.0548, 9.1629,\n 9.0895, 9.0167, 9.1242, 9.2311, 9.3374, 9.2651, 9.1932, 9.2990, 9.4042,\n 9.3328, 9.4375, 9.3665, 9.4707, 9.4002, 9.5038, 9.4338, 9.3642, 9.2952,\n 9.2265, 9.1584, 9.2613, 9.1936, 9.1262, 9.2287, 9.3306, 9.2637, 9.1971,\n 9.2986, 9.2324, 9.1667, 9.2676, 9.2022, 9.1372, 9.2376, 9.1730, 9.2729,\n 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which philosopher described the invention of a belt drive?\nContext: Yang Xiong described the invention of the belt drive for a quilling machine, which was of great importance to early textile manufacturing.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -0.8729,\n -0.9233, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.4019, -1.2623, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.2700, -1.3088, -1.1790, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.0186, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.3249, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.0822, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.4449, 8.3463, 8.2488, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.2729, 9.1856,\n 9.0991, 9.0134, 8.9285, 8.8443, 8.9612, 8.8778, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.2867, 9.3993, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.5714, 9.6814, 9.7908, 9.8995, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 10.8686,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.2630, 11.3608, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.5261, 11.4533, 11.3809, 11.4766, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How many alumni does Olin Business School have worldwide?\nContext: Olin has a network of more than 16,000 alumni worldwide.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.3482, 0.6025, 0.8513, 1.0948, 1.0000,\n 1.2372, 1.4697, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.3308, 1.2599, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.6166, 1.5511, 1.4863, 1.6498, 1.8116,\n 1.9720, 2.1309, 2.2884, 2.2222, 2.1567, 2.0918, 2.0276, 1.9640,\n 1.9009, 2.0548, 1.9920, 1.9298, 1.8682, 1.8071, 1.7465, 1.8974,\n 1.8370, 1.9863, 1.9261, 1.8665, 2.0140, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.9242, 2.0682, 2.0101, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.8676, 2.0078, 1.9518, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.1980, 2.3333, 2.2780, 2.2230, 2.1685, 2.1143, 2.0605,\n 2.0071, 1.9540, 2.0868, 2.2188, 2.1656, 2.1128, 2.2436, 2.3735,\n 2.3206, 2.4495, 2.3967, 2.3443, 2.2923, 2.2406, 2.1892, 2.1381,\n 2.0873, 2.0369, 1.9868, 1.9370, 1.8875, 1.8383, 1.9640, 2.0889,\n 2.2132, 2.1637, 2.1145, 2.2377, 2.3603, 2.3110, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.1418, 2.0943, 2.0470, 2.1667,\n 2.1195, 2.0726, 2.1913, 2.3094, 2.2624, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.3886, 5.2204, 5.4322, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.5907, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.6681, 7.8113, 7.9530, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.2376, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.0380,\n 9.9392, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.1324, 11.2414, 11.1480, 11.0554, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 12.0218, 11.9319,\n 12.0345, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.7581, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.4390, 13.5329, 13.4477, 13.5412,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.6514, 13.5683, 13.6604, 13.5781,\n 13.6698, 13.7612, 13.8522, 13.9427, 13.8613, 13.7803, 13.6999, 13.6201,\n 13.7106, 13.6313, 13.5526, 13.4744, 13.5647, 13.6546, 13.5771, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the goal of the 2002 Philippines deployment?\nContext: The goal of the program was to provide medical care and services to the region of Basilan as part of a \"Hearts and Minds\" program.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.1825, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.0105, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.0445,\n -0.9113, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.0037, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 4.9373, 5.1371, 5.0000,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.0000, 7.1525, 7.3033, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 7.9115, 7.8074, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.2151, 8.3463, 8.2488, 8.1524,\n 8.0571, 7.9630, 7.8699, 7.7778, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.6210, 9.5338, 9.6484, 9.5620, 9.4763, 9.3915, 9.3074,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.1695, 10.0910, 10.1968, 10.1189, 10.0416,\n 9.9648, 9.8887, 9.8131, 9.9184, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.0814, 10.1846, 10.1106, 10.0371, 9.9642, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which band was he largest benefit concert in history about?\nContext: Performers, including Def Leppard, Robert Plant, Guns N' Roses, Elton John, David Bowie, George Michael, Annie Lennox, Seal, Extreme, and Metallica performed various Queen songs along with the three remaining Queen members (and Spike Edney.)\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, 0.0000, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.2722, -0.3166, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.2596, -0.1295, -0.1721, -0.2146, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.1603, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.8177, 4.7002, 4.8857, 5.0684, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.4909, 5.3825, 5.2760, 5.1711,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.7242, 5.8835, 6.0410, 5.9386,\n 5.8377, 5.9932, 6.1471, 6.0474, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.0587, 5.9641, 5.8707, 5.7785, 5.6875, 5.5976, 5.5088, 5.6569,\n 5.5690, 5.7155, 5.8606, 5.7735, 5.6874, 5.6023, 5.7457, 5.8878,\n 6.0288, 5.9442, 5.8605, 5.7778, 5.9171, 6.0553, 6.1924, 6.1101,\n 6.0287, 6.1644, 6.0837, 6.0038, 6.1382, 6.0590, 6.1923, 6.1137,\n 6.2459, 6.3770, 6.5072, 6.6365, 6.5583, 6.4807, 6.6089, 6.7361,\n 6.6591, 6.7854, 6.9107, 7.0353, 6.9587, 7.0823, 7.0063, 6.9310,\n 7.0537, 7.1755, 7.2966, 7.2217, 7.3419, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.7096, 7.8253, 7.9403, 7.8673,\n 7.7949, 7.9091, 8.0227, 7.9507, 8.0636, 8.1758, 8.2874, 8.3984,\n 8.5088, 8.6186, 8.5469, 8.6560, 8.7646, 8.6933, 8.8013, 8.9087,\n 8.8379, 8.9447, 9.0510, 9.1567, 9.2619, 9.3665, 9.2961, 9.4002,\n 9.5038, 9.4338, 9.3642, 9.4673, 9.5698, 9.6719, 9.6028, 9.5341,\n 9.4658, 9.5673, 9.6684, 9.6005, 9.5331, 9.6336, 9.7337, 9.6667,\n 9.7663, 9.6996, 9.6334, 9.7325, 9.6666, 9.6011, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: The most popular early Marvel heroes were collectively and colloquially known as what?\nContext: While no other Timely character would achieve the success of these \"big three\", some notable heroes\u2014many of which continue to appear in modern-day retcon appearances and flashbacks\u2014include the Whizzer, Miss America, the Destroyer, the original Vision, and the Angel.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.2514, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.0265, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.0068, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -1.8732,\n -1.9137, -1.9540, -1.9941, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.1381,\n -2.1762, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.0656, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.0212, -2.0578, -2.0943, -2.1306, -2.0000,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.2121, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.6418, 8.7758, 8.9086, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 9.8987, 10.0188, 10.1379, 10.0380,\n 9.9392, 10.0577, 9.9601, 9.8634, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.3898, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.6733, 10.7828, 10.8916,\n 10.8025, 10.7141, 10.8224, 10.9301, 10.8426, 10.7559, 10.8631, 10.7772,\n 10.6920, 10.7987, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.8019, 12.7226, 12.8165, 12.7378, 12.8313, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.1520, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Did everyone endorse Gaddafi?\nContext: Gaddafi remained the government's public face, with the identities of the other RCC members only being publicly revealed on 10 January 1970.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, 0.0000, -0.0861, 0.1703, 0.4211, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.4606, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.5331, 0.7071,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 0.8003, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.8729,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.2060, 1.1514, 1.0973, 1.2423, 1.3862,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.5303, 1.6641, 1.7970, 1.7454, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.7321, 1.6827, 1.6336, 1.7609, 1.7119, 1.6632, 1.7894, 1.9149,\n 2.0396, 1.9906, 2.1145, 2.0656, 2.0170, 1.9686, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.9738, 1.9267, 1.8799, 2.0000,\n 1.9533, 1.9068, 2.0259, 2.1444, 2.2624, 2.2156, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 5.9628, 6.1355, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.9586, 8.8602, 8.9861, 9.1111, 9.2351, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.5543, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.7473, 9.8632, 9.9783, 9.8877, 9.7980,\n 9.7091, 9.8236, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.3445, 10.4537, 10.3683, 10.4769, 10.5848,\n 10.5002, 10.6076, 10.5238, 10.6306, 10.7367, 10.8423, 10.7594, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.7242, 10.8282, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.5655, 11.6632,\n 11.5868, 11.6840, 11.6082, 11.7050, 11.6297, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.5183, 12.6102, 12.7017, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What type of lanterns are used outside elevators as well as inside most cabs?\nContext: The former are almost universal in cab interiors with more than two stops and may be found outside the elevators as well on one or more of the floors.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.5744,\n -2.6143, -2.6540, -2.6934, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.9216, -2.9584, -2.9950, -3.0315, -3.0677, -3.1038,\n -3.1396, -2.9938, -2.8490, -2.8853, -2.9215, -2.9575, -2.9933, -3.0290,\n -3.0644, -3.0997, -3.1347, -3.1696, -3.2043, -3.2389, -3.2733, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.6%", + "z-score": "12.9", + "p value": "2.78e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.7277, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.5743, 9.4685, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 10.1379, 10.0380,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.3695, 10.4829, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.7444, 10.8544, 10.9637, 10.8729, 10.9816, 10.8916,\n 10.8025, 10.7141, 10.8224, 10.9301, 11.0371, 11.1435, 11.2493, 11.1621,\n 11.2674, 11.3721, 11.2857, 11.2001, 11.3043, 11.2194, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.7031, 11.6219, 11.7217, 11.8210, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.7980, 11.7200, 11.6425, 11.7401, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.0493, 12.1447, 12.0685, 11.9928, 11.9176,\n 11.8429, 11.9380, 11.8638, 11.9586, 12.0529, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.7928, 12.8836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who covers incorporated associations or councils?\nContext: Furthermore, they operate across a multitude of domains and industries, from health, employment, disability and other human services to local sporting clubs, credit unions and research institutes.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.8131, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.6524, 1.8257, 1.9973, 1.9262, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.6498, 1.8116,\n 1.7467, 1.9066, 1.8419, 1.7778, 1.7143, 1.8716, 1.8084, 1.9640,\n 2.1182, 2.0548, 2.2074, 2.1442, 2.2952, 2.2323, 2.3817, 2.3190,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.2454,\n 2.3891, 2.3293, 2.4717, 2.4121, 2.5532, 2.6933, 2.8324, 2.7724,\n 2.7129, 2.6540, 2.5954, 2.5373, 2.4797, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.3891, 2.5238, 2.6576, 2.6014, 2.7341, 2.6781, 2.6224,\n 2.5672, 2.6984, 2.6433, 2.7735, 2.9029, 2.8478, 2.9761, 2.9212,\n 3.0486, 2.9938, 3.1203, 3.0657, 3.1912, 3.1368, 3.0827, 3.0290,\n 2.9756, 2.9225, 2.8698, 2.9935, 3.1166, 3.0638, 3.1860, 3.1334,\n 3.2547, 3.3754, 3.4954, 3.4427, 3.3902, 3.3381, 3.2863, 3.2348,\n 3.1836, 3.3020, 3.2509, 3.2002, 3.1497, 3.0995, 3.2167, 3.3333,\n 3.2831, 3.3990, 3.3489, 3.2991, 3.2496, 3.3645, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 5.7155,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.4790, 7.6376, 7.5056,\n 7.3760, 7.2488, 7.4061, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.8296, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 8.9314, 9.0582,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.0554, 10.9637, 11.0724, 10.9816, 11.0897,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.6137,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.7498, 11.8503, 11.7672, 11.6847,\n 11.7849, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.8019, 12.7226, 12.6439, 12.7378, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.6155, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where is the Universidad Tecnologica located?\nContext: In addition, the prestigious University of California maintains a campus known as \"Casa de California\" in the city.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "154", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "18.2%", + "z-score": "-1.95", + "p value": "0.975", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.9540])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.4222, 5.2778, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.1241, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.4870, 5.3666, 5.2485, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.5514, 6.7006, 6.5993, 6.7469, 6.6469,\n 6.5483, 6.4510, 6.5970, 6.7416, 6.8849, 6.7886, 6.9305, 6.8354,\n 6.7414, 6.6486, 6.5569, 6.4663, 6.6066, 6.7456, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.3333, 7.4655, 7.5967, 7.7268, 7.6376,\n 7.5494, 7.6785, 7.8065, 7.9336, 7.8463, 7.7598, 7.8859, 7.8003,\n 7.7155, 7.8406, 7.7566, 7.8808, 7.7976, 7.7152, 7.6335, 7.7567,\n 7.8791, 8.0006, 7.9196, 8.0403, 8.1602, 8.2793, 8.1989, 8.1192,\n 8.2375, 8.3550, 8.4718, 8.3927, 8.3143, 8.4303, 8.3525, 8.2754,\n 8.3906, 8.3140, 8.4286, 8.3526, 8.2772, 8.2024, 8.3162, 8.4293,\n 8.5417, 8.4674, 8.5792, 8.6903, 8.8008, 8.7270, 8.6537, 8.7636,\n 8.8728, 8.9815, 8.9086, 8.8364, 8.9444, 8.8726, 8.8013, 8.9087,\n 8.8379, 8.9447, 8.8744, 8.8045, 8.7351, 8.8413, 8.9469, 9.0520,\n 8.9830, 9.0876, 9.1916, 9.2952, 9.2265, 9.1584, 9.2613, 9.3638,\n 9.4658, 9.3980, 9.3306, 9.4321, 9.3651, 9.2986, 9.3995, 9.3333,\n 9.4338, 9.3680, 9.3026, 9.2376, 9.3375, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is Torrence's 1989 theory about that ties into tool kit variability ?\nContext: Using temperature as a proxy for risk, Collard et al.'s results suggest that environments with extreme temperatures pose a threat to hunter-gatherer systems significant enough to warrant increased variability of tools.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.0911, -0.8660,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.5832, -0.6383, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.4364,\n -0.2716, -0.1081, 0.0538, 0.0000, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.2010, 0.3504, 0.4988, 0.6460, 0.5941,\n 0.7399, 0.8847, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.6108, 0.7493,\n 0.8868, 1.0235, 1.1593, 1.1094, 1.2441, 1.3779, 1.3278, 1.2780,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 1.1038, 1.2326, 1.3607, 1.4881, 1.6148, 1.5667,\n 1.6925, 1.8175, 1.7693, 1.7213, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.4699, 1.4241, 1.5457, 1.6667,\n 1.7870, 1.9068, 2.0259, 1.9795, 2.0979, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 3.8772, 4.0980, 3.9620, 4.1779, 4.0451, 4.2563, 4.1265, 4.0000,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.2426, 4.4374, 4.3205,\n 4.5118, 4.3970, 4.2844, 4.4721, 4.3614, 4.5461, 4.4371, 4.6188,\n 4.5115, 4.4061, 4.3026, 4.4809, 4.6568, 4.5544, 4.7278, 4.6268,\n 4.7977, 4.9666, 4.8667, 5.0332, 5.1978, 5.0990, 5.2615, 5.1640,\n 5.3245, 5.4832, 5.3867, 5.5435, 5.6986, 5.6032, 5.7566, 5.6622,\n 5.8139, 5.7207, 5.8707, 6.0193, 5.9270, 6.0740, 6.2197, 6.1283,\n 6.2725, 6.1820, 6.3248, 6.2354, 6.3768, 6.5169, 6.4283, 6.5672,\n 6.7049, 6.8414, 6.9768, 6.8889, 7.0231, 6.9361, 7.0692, 6.9830,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.7598, 7.8859, 7.8003,\n 7.9254, 7.8406, 7.9649, 7.8808, 8.0042, 8.1266, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 8.8778, 8.7952, 8.9113,\n 8.8294, 8.7482, 8.6677, 8.5879, 8.5088, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 8.8448, 8.9567, 8.8800, 8.9912,\n 9.1018, 9.0257, 9.1357, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.5577, 9.4837, 9.5896, 9.6948, 9.6214,\n 9.5485, 9.4761, 9.4042, 9.5089, 9.6130, 9.5416, 9.6452, 9.5743,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.4427, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What conflict was supposed to have provided Iraqi forces with battle experience?\nContext: Iraqi forces were battle-hardened after 8 years of war with Iran, and they were well equipped with late model Soviet tanks and jet fighters, but the antiaircraft weapons were crippled; in comparison, the US had no large-scale combat experience since its withdrawal from Vietnam nearly 20 years earlier, and major changes in US doctrine, equipment and technology since then had never been tested under fire.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.4944, 0.4481, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.4233, 0.3800, 0.3369, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.8041, 7.0200, 7.2296, 7.4333, 7.1393, 7.3435, 7.5425,\n 7.2746, 7.4730, 7.2222, 6.9830, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 7.8780, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.1882, 8.3557, 8.5206, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.1036,\n 9.9540, 10.0915, 9.9454, 9.8020, 9.6612, 9.5229, 9.6612, 9.7980,\n 9.9333, 9.7989, 9.9333, 9.8015, 9.9351, 9.8058, 9.6786, 9.8116,\n 9.6867, 9.8187, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.3397,\n 10.4650, 10.5893, 10.7125, 10.8347, 10.7164, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.3189, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.8895, 12.0005, 12.1107, 12.2202,\n 12.3289, 12.2207, 12.3289, 12.4365, 12.5434, 12.6496, 12.5434, 12.4383,\n 12.3343, 12.4405, 12.5460, 12.4434, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.6592, 12.5604, 12.6635, 12.5657, 12.4689, 12.5717, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.8819, 12.9820, 13.0815, 12.9874, 13.0866,\n 13.1852, 13.2834, 13.3810, 13.2882, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.7772, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 14.1543, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.5226, 14.6126, 14.7023, 14.7916, 14.8804, 14.7939, 14.7079, 14.7966,\n 14.8849, 14.7998, 14.7152, 14.6313, 14.7195, 14.8074, 14.8950, 14.8119,\n 14.8991, 14.8167, 14.9037, 14.8219, 14.7406, 14.8274, 14.7468, 14.8333,\n 14.9195, 15.0054, 15.0909, 15.1761, 15.2609, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the average number of people in a Plymouth household?\nContext: From the 2011 Census, the Office for National Statistics published that Plymouth's unitary authority area population was 256,384; 15,664 more people than that of the last census from 2001, which indicated that Plymouth had a population of 240,720.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.5", + "p value": "0.933", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.7154, -1.5479, -1.5945, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.4976])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.2993, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.2325, 8.1240,\n 8.2619, 8.1550, 8.2916, 8.1862, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.4138, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.9586, 8.8602, 8.9861, 8.8889, 8.7927, 8.9178, 8.8227, 8.9469,\n 9.0702, 8.9763, 8.8833, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 10.0261, 9.9373, 10.0504, 9.9625, 9.8753, 9.7890, 9.9015,\n 10.0133, 9.9278, 9.8430, 9.9542, 10.0647, 9.9807, 9.8975, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.1692, 10.0881, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.3805, 10.3020, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.3688, 11.2924, 11.2164, 11.3150,\n 11.2396, 11.3378, 11.2630, 11.3608, 11.2864, 11.3837, 11.4806, 11.5771,\n 11.5033, 11.4300, 11.3572, 11.4533, 11.5489, 11.6441, 11.7389, 11.8333,\n 11.7611, 11.8551, 11.7833, 11.7120, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who wrote in their will that they received loving care from the emperor in the east?\nContext: The Tai Situpa is even supposed to have written in his will: \"In the past I received loving care from the emperor in the east.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.7321,\n -1.7740, -1.8157, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.7636, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.1167, -2.1532, -2.1896, -2.2258, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.5363, 4.7357, 4.9316, 4.8038, 4.9962, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.3089, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.2232, 7.3638, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.2012, 8.1111, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.6238, 8.7439, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 8.8778, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.5840, 9.5066, 9.4299, 9.5381, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.0231, 10.1273, 10.0523, 9.9778,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.3154, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.3999, 10.3280, 10.4281, 10.5278, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.8505, 10.9480, 10.8770, 10.9740, 10.9034, 11.0000,\n 10.9299, 10.8602, 10.7910, 10.7222, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In addition to social and cultural characteristics, what else is taken into account for race classification in the US census?\nContext: \" OMB defines the concept of race as outlined for the U.S. Census as not \"scientific or anthropological\" and takes into account \"social and cultural characteristics as well as ancestry\", using \"appropriate scientific methodologies\" that are not \"primarily biological or genetic in reference.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.19", + "p value": "0.575", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.4413, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 7.9530, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 7.9495, 8.0829, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.1291, 8.0370, 7.9460, 7.8558,\n 7.7667, 7.8948, 7.8065, 7.9336, 7.8463, 7.9724, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.5964, 8.5153,\n 8.6321, 8.5516, 8.4718, 8.5879, 8.7033, 8.8179, 8.9319, 9.0452,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.4299, 9.5381, 9.6456, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.9940, 9.9184, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.3617, 10.2872, 10.2132, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.4525, 11.3820, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How many men were in Napoleon's army when the battle began?\nContext: Bonaparte began with an army of 13,000 men; 1,500 were reported missing, 1,200 died in combat, and thousands perished from disease\u2014mostly bubonic plague.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.2686, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.2326, -1.2730, -1.3131, -1.1784, -1.0445,\n -1.0849, -1.1251, -0.9925, -0.8607, -0.9012, -0.7703, -0.8109, -0.6810,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.8856,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.5627, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.0825, 3.9614, 3.8431, 4.0446, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.3100, 5.4848,\n 5.3709, 5.2590, 5.1490, 5.3211, 5.4909, 5.6585, 5.5500, 5.7155,\n 5.6086, 5.5035, 5.6667, 5.8279, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.7555, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.7327, 8.8544,\n 8.7652, 8.6770, 8.7978, 8.9178, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.0389, 9.9542, 10.0647, 10.1745, 10.2837, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 10.9773, 10.8984, 11.0004, 11.1018, 11.2028, 11.1245, 11.2250,\n 11.1473, 11.2473, 11.1702, 11.0937, 11.1933, 11.2924, 11.3910, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.2209, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.3525, 12.2794, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How many paper cups are used by Americans each year?\nContext: Americans also use on the order of 16 billion paper cups per year.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, -0.0534, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.0219, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.7419, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.5021, 9.6309, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.4304, 10.5490, 10.4444, 10.5623, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.9355, 11.0488, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 11.9650, 11.8704, 11.9754, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.1012, 12.2040, 12.3063, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.4286, 12.5289, 12.6287, 12.7279, 12.8267, 12.7376,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.8556, 12.9527, 12.8661, 12.9628,\n 13.0590, 12.9732, 13.0690, 13.1644, 13.2593, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.4661, 13.3829, 13.4758, 13.3933, 13.4859, 13.5781,\n 13.6698, 13.5881, 13.6796, 13.7706, 13.6896, 13.7803, 13.8707, 13.9606,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.1573, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Do the roots and shoots need each other?\nContext: Roots that spread out close to the surface, such as those of willows, can produce shoots and ultimately new plants.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.5262, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.7935, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.7139, -0.7539, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.0779, 4.9373, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.0401, 6.9294, 6.8205, 6.9714,\n 6.8641, 6.7583, 6.9076, 6.8034, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.3638, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.5715, 8.4774, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.9752, 8.8860, 9.0060, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.6186, 9.5346, 9.6471, 9.5638, 9.4812, 9.5931, 9.7044, 9.6225,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.6683, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.2790, 10.3827, 10.3065, 10.2310, 10.3341,\n 10.4367, 10.3617, 10.4638, 10.5654, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.9178, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.3335, 11.2607, 11.1883, 11.1164, 11.0450, 10.9740, 11.0705, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Why do people say KInseys work is not correct?\nContext: Kinsey's methods have been criticized as flawed, particularly with regard to the randomness of his sample population, which included prison inmates, male prostitutes and those who willingly participated in discussion of previously taboo sexual topics.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.1651, -1.0276, -1.0690,\n -1.1103, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.8420, 8.0064, 8.1684, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.3811, 9.5230, 9.6632, 9.5219, 9.6612, 9.7989, 9.9352, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.0664, 10.1983, 10.3288, 10.4581, 10.3280,\n 10.4565, 10.3287, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.8321,\n 10.7098, 10.8327, 10.9546, 10.8347, 10.9559, 10.8379, 10.9585, 11.0780,\n 10.9621, 10.8477, 10.9669, 11.0851, 10.9727, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.3333, 11.4471, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.5718, 11.6829, 11.7932, 11.6894, 11.5866, 11.6966, 11.8058,\n 11.9144, 11.8132, 11.9213, 12.0286, 11.9288, 11.8299, 11.9370, 12.0433,\n 11.9457, 11.8491, 11.9551, 11.8594, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 11.8937, 11.8018, 11.9060, 12.0096, 11.9187, 11.8287, 11.7395,\n 11.8427, 11.7543, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.3027, 12.4015, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.7199, 12.8160, 12.7329, 12.6504,\n 12.7461, 12.6643, 12.7597, 12.8546, 12.9491, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.2542, 13.1746, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: At least how many American aircraft were shot down between 1945 and 1948?\nContext: Stalin was opposed to these provocations, as he felt the USSR unready to face the West in open war so soon after the losses of World War II and at the time when US had operational nuclear weapons whereas USSR had yet to conduct its first test.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.8047, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.4540, 0.6030, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.4070, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.3884, 0.3443, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.5927, 0.7177, 0.8422, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.8682, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.8928,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.0883, 10.9917, 11.1026, 11.2127, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.3497, 11.2564, 11.3642, 11.2719, 11.3791, 11.2877,\n 11.3944, 11.3039, 11.2142, 11.3204, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.4746, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.6103, 12.5264, 12.6234, 12.5401, 12.6367, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 13.0307, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.1617, 13.2542, 13.1746, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.1764, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the electrically insulting material that sheaths a-delta fiber?\nContext: A-delta fibers is described as sharp and is felt first.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.3%", + "z-score": "0.0821", + "p value": "0.467", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -0.9949, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.2872, -0.1143, -0.1707, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.0865, 0.2158, 0.3443, 0.3004, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.1234, 0.0821])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.3894, 4.2563, 4.4634, 4.3333,\n 4.5363, 4.4091, 4.2848, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 5.9438, 5.8275, 5.7133, 5.8812, 6.0469, 5.9346, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.9333, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.0943, 5.9932, 5.8936, 6.0474, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.7555, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.5143, 7.4233, 7.5556, 7.4655, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.5553, 8.4679, 8.5896, 8.5030, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.4057, 8.3231, 8.4423, 8.3605, 8.4788, 8.5964, 8.5153,\n 8.4348, 8.5516, 8.6677, 8.5879, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.3212, 9.4299, 9.5381, 9.6456, 9.5695, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.3774, 9.4837, 9.4103, 9.3374, 9.4432,\n 9.5485, 9.6532, 9.7574, 9.8611, 9.9642, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.2993, 10.2273, 10.3280, 10.2565, 10.1855, 10.1149, 10.2151,\n 10.3148, 10.2447, 10.1750, 10.1058, 10.2050, 10.1363, 10.2350, 10.3333,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.3248, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What percentage of the population is of the Rakhine descendant line ?\nContext: The Rakhine people constitute 4% of the population.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.2949, -0.1469, -0.1952, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.2146, -0.2568, -0.2988, -0.1703,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547, 1.5403, 1.3608,\n 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.3570, 2.1939, 2.0381, 1.8889,\n 1.7457, 1.6082, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 1.6590, 1.9245,\n 2.1831, 2.0605, 2.3113, 2.5560, 2.7952, 3.0290, 2.9055, 2.7852, 2.6681,\n 2.5538, 2.4422, 2.3333, 2.2269, 2.4495, 2.6679, 2.5621, 2.7757, 2.6713,\n 2.5690, 2.4689, 2.6765, 2.8808, 2.7811, 2.9814, 3.1787, 3.3729, 3.5642,\n 3.4641, 3.3657, 3.2691, 3.1741, 3.0806, 2.9887, 2.8983, 3.0833, 2.9938,\n 2.9057, 2.8189, 2.7333, 2.6491, 2.8292, 3.0071, 2.9231, 3.0984, 3.2717,\n 3.4429, 3.6122, 3.5277, 3.4442, 3.3619, 3.2806, 3.2004, 3.1211, 3.0429,\n 3.2077, 3.3708, 3.5322, 3.4538, 3.3764, 3.2998, 3.4586, 3.6159, 3.5396,\n 3.6950, 3.8490, 4.0016, 3.9253, 4.0762, 4.2258, 4.3740, 4.5210, 4.4444,\n 4.5899, 4.5140, 4.6580, 4.8008, 4.9424, 5.0829, 5.2223, 5.1461, 5.0707,\n 5.2086, 5.3455, 5.4813, 5.6160, 5.7498, 5.6743, 5.8069, 5.9386, 6.0693,\n 5.9941, 5.9196, 5.8458, 5.9752, 6.1036, 6.2312, 6.3580, 6.4838, 6.4101,\n 6.3369, 6.2644, 6.3892, 6.5130, 6.6361, 6.7584, 6.8799, 7.0007, 6.9282,\n 6.8563, 6.9762, 7.0952, 7.2136, 7.1421, 7.0711, 7.1886, 7.3054, 7.2348,\n 7.3508, 7.4662, 7.5809, 7.5106, 7.6246, 7.7380, 7.6681, 7.7808, 7.7114,\n 7.8233, 7.7544, 7.8657, 7.9764, 8.0865, 8.0178, 7.9497, 8.0591, 7.9913,\n 8.1001, 8.2084, 8.1410, 8.2486, 8.3557, 8.2887, 8.3952, 8.5012, 8.6066,\n 8.5399, 8.6448, 8.7492, 8.8531, 8.7867, 8.7207, 8.8240, 8.7584, 8.8612,\n 8.9635, 9.0653, 9.0000, 8.9351, 8.8706, 8.9718, 9.0726, 9.1730, 9.2729,\n 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How high do the mountains get in Mexico City's region?\nContext: Mexico City is located in the Valley of Mexico, sometimes called the Basin of Mexico.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "87", + "Fraction of T in Greenlist": "43.7%", + "z-score": "6.1", + "p value": "5.36e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.5323, 1.4317, 1.6667,\n 1.8970, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.2743, 2.4804, 2.3851, 2.5873, 2.7863, 2.6914, 2.5981,\n 2.5064, 2.4163, 2.6098, 2.5205, 2.7107, 2.6222, 2.5352, 2.7217,\n 2.6354, 2.8189, 3.0000, 3.1789, 3.3556, 3.2686, 3.1829, 3.3566,\n 3.2717, 3.4429, 3.3587, 3.2757, 3.4442, 3.3619, 3.5282, 3.6927,\n 3.6107, 3.5298, 3.4498, 3.3708, 3.5322, 3.4538, 3.6133, 3.5355,\n 3.4586, 3.6159, 3.5396, 3.6950, 3.8490, 4.0016, 4.1528, 4.0762,\n 4.0004, 4.1497, 4.0745, 4.2222, 4.1475, 4.0736, 4.2196, 4.1461,\n 4.2907, 4.4341, 4.3609, 4.2885, 4.2167, 4.1457, 4.2870, 4.2164,\n 4.3564, 4.2862, 4.2167, 4.3552, 4.2861, 4.4234, 4.5596, 4.6949,\n 4.8291, 4.7599, 4.6912, 4.8242, 4.7559, 4.8878, 4.8200, 4.7527,\n 4.8833, 4.8164, 4.9460, 5.0747, 5.0080, 4.9419, 4.8763, 4.8113,\n 4.9385, 4.8737, 5.0000, 4.9356, 4.8717, 4.9969, 4.9333, 5.0576,\n 5.1810, 5.3038, 5.4257, 5.3621, 5.2989, 5.4199, 5.3571, 5.4772,\n 5.4147, 5.3526, 5.4718, 5.4100, 5.5284, 5.6462, 5.5846, 5.5234,\n 5.4626, 5.4023, 5.5189, 5.4588, 5.5747, 5.5149, 5.4554, 5.5705,\n 5.5113, 5.6256, 5.7394, 5.8525, 5.9651, 5.9059, 5.8470, 5.9588,\n 5.9002, 6.0113, 5.9530, 5.8951, 6.0054, 5.9477, 6.0575, 6.1667,\n 6.1091, 6.0519, 5.9950, 5.9385, 6.0468, 5.9905, 6.0982])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.9507, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.3890, 6.2610, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.5593, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 8.1176, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.6976, 8.6035, 8.5105,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.6770, 8.5896, 8.5030, 8.6238, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 10.0647, 10.1745, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.4411, 11.5414, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.1076, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.5986,\n 12.6918, 12.7847, 12.8771, 12.9691, 12.8928, 12.9845, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.1063, 13.1966, 13.2864, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the former 10-year moratorium on the construction of nuclear plants the result of?\nContext: The former ten-year moratorium on the construction of new nuclear power plants was the result of a citizens' initiative voted on in 1990 which had passed with 54.5%\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.4%", + "z-score": "-1.81", + "p value": "0.965", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.3620, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.2857, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.2521, -1.2943, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -1.8333,\n -1.7039, -1.7410, -1.7778, -1.8145])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 4.6667, 4.3235, 4.0119, 4.3231, 4.0415, 3.7808, 4.0825,\n 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998, 3.1177, 2.9439, 2.7778,\n 3.0551, 2.8947, 3.1623, 3.0072, 2.8577, 3.1156, 2.9704, 3.2205, 3.0792,\n 3.3221, 3.1844, 3.4207, 3.2863, 3.5165, 3.7417, 3.9620, 4.1779, 4.0451,\n 3.9158, 3.7897, 4.0000, 3.8765, 3.7559, 3.6380, 3.8431, 3.7273, 3.6141,\n 3.5032, 3.3947, 3.2883, 3.4873, 3.3824, 3.5777, 3.7700, 3.9595, 3.8552,\n 4.0415, 4.2251, 4.4061, 4.3026, 4.2008, 4.3788, 4.5544, 4.4537, 4.6268,\n 4.7977, 4.6981, 4.8667, 4.7683, 4.6715, 4.5760, 4.7419, 4.6476, 4.8113,\n 4.9731, 5.1332, 5.2915, 5.4482, 5.6032, 5.5090, 5.6622, 5.5691, 5.7207,\n 5.8707, 6.0193, 6.1664, 6.0740, 5.9827, 5.8926, 6.0380, 5.9488, 5.8606,\n 5.7735, 5.9172, 5.8310, 5.7457, 5.8878, 5.8034, 5.7199, 5.8605, 6.0000,\n 6.1383, 6.2755, 6.4116, 6.5465, 6.6804, 6.5970, 6.7298, 6.6471, 6.7788,\n 6.9094, 7.0391, 7.1678, 7.0857, 7.0043, 6.9237, 7.0513, 6.9714, 6.8922,\n 6.8138, 6.9402, 6.8624, 6.7854, 6.9107, 6.8343, 6.7585, 6.8828, 7.0063,\n 7.1291, 7.2510, 7.3721, 7.4924, 7.6120, 7.5364, 7.6551, 7.5800, 7.6980,\n 7.8153, 7.9318, 8.0476, 7.9729, 7.8988, 7.8253, 7.9403, 7.8673, 7.7949,\n 7.7230, 7.8372, 7.7658, 7.6950, 7.8084, 7.7380, 7.6681, 7.7808, 7.8928,\n 8.0042, 8.1150, 8.2252, 8.3349, 8.4439, 8.3742, 8.4826, 8.4133, 8.5212,\n 8.6284, 8.7351, 8.8413, 8.7724, 8.7039, 8.6359, 8.7414, 8.6738, 8.6066,\n 8.5399, 8.6448, 8.5785, 8.5126, 8.6169, 8.5513, 8.4862, 8.5899, 8.6932,\n 8.7959, 8.8982, 9.0000, 9.1013, 9.2022, 9.1372, 9.2376, 9.1730, 9.2729,\n 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How far is to the neighboring capitol of Canelones?\nContext: The approximate distances to the neighbouring department capitals by road are, 90 kilometres (56 mi) to San Jose de Mayo (San Jose Department) and 46 kilometres (29 mi) to Canelones (Canelones Department).\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.6660, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.1613, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.2517, -2.2892, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.1264, -1.9906, -2.0282, -2.0656, -1.9311, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.7780, -1.6466, -1.6843, -1.5539, -1.5916, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.64", + "p value": "0.000135", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.6353, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.3198, 1.5275, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 1.2501, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.4027, 1.3308, 1.5119, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.7552, 1.9262, 1.8559, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.9149, 2.0785, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.6222, 1.7767, 1.9298, 1.8682, 1.8071, 1.9582, 2.1082,\n 2.0470, 1.9863, 1.9261, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.7217, 1.6646, 1.6081, 1.5519, 1.6958, 1.6398, 1.5842,\n 1.7264, 1.8676, 1.8119, 1.9518, 2.0907, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.5505, 1.6859,\n 1.6337, 1.7679, 1.7158, 1.6641, 1.6127, 1.7454, 1.8773, 1.8257,\n 1.9566, 2.0866, 2.0350, 1.9837, 1.9327, 1.8821, 2.0105, 1.9599,\n 1.9097, 2.0369, 2.1634, 2.2892, 2.2387, 2.1884, 2.1385, 2.2630,\n 2.3868, 2.3368, 2.4597, 2.5820, 2.7036, 2.8245, 2.9448, 2.8943,\n 3.0138, 3.1327, 3.0821, 3.0317, 2.9817, 3.0995, 3.0496, 3.0000,\n 2.9507, 3.0674, 3.1836, 3.2991, 3.4142, 3.5286, 3.6425])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who overturned the Taft Vale judgement?\nContext: One of the first acts of the new Liberal Government was to reverse the Taff Vale judgement.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.1267, -0.1684, -0.2100, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.0656, 1.8898, 2.2156, 2.0466, 2.3570, 2.1939, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 3.0072, 2.8577, 2.7136, 2.5744, 2.4398, 2.3094,\n 2.5627, 2.4351, 2.3113, 2.5560, 2.4345, 2.6726, 2.5533, 2.4371, 2.3238,\n 2.2133, 2.4422, 2.6667, 2.8868, 2.7761, 2.9913, 2.8823, 2.7757, 2.6713,\n 2.5690, 2.7775, 2.9824, 2.8808, 2.7811, 2.9814, 2.8830, 3.0796, 3.2733,\n 3.4641, 3.6522, 3.8376, 4.0205, 4.2008, 4.1008, 4.2784, 4.4537, 4.3546,\n 4.5274, 4.4296, 4.6000, 4.7683, 4.6715, 4.8375, 5.0017, 4.9058, 5.0679,\n 5.2281, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622, 5.8139, 5.7207,\n 5.6286, 5.7785, 5.6875, 5.5976, 5.5088, 5.4212, 5.3345, 5.4822, 5.3964,\n 5.5426, 5.6874, 5.8310, 5.9732, 6.1143, 6.0288, 5.9442, 6.0838, 6.2222,\n 6.3595, 6.4957, 6.6308, 6.7648, 6.6804, 6.5970, 6.5144, 6.4327, 6.3517,\n 6.2716, 6.4040, 6.5354, 6.6658, 6.7952, 6.9237, 6.8439, 6.7648, 6.8922,\n 6.8138, 6.7361, 6.6591, 6.5828, 6.5072, 6.6332, 6.5582, 6.6833, 6.8076,\n 6.9310, 7.0537, 6.9789, 7.1007, 7.2217, 7.1474, 7.0737, 7.0007, 7.1207,\n 7.2399, 7.3584, 7.4762, 7.4034, 7.3312, 7.4482, 7.3765, 7.3054, 7.2348,\n 7.3508, 7.4662, 7.3960, 7.5106, 7.6246, 7.5548, 7.4855, 7.4168, 7.5299,\n 7.6424, 7.5740, 7.5061, 7.4386, 7.5504, 7.6615, 7.7720, 7.8820, 7.8147,\n 7.7480, 7.8572, 7.7908, 7.7249, 7.6594, 7.7679, 7.8759, 7.8107, 7.9181,\n 8.0249, 7.9601, 7.8956, 7.8316, 7.9377, 8.0433, 7.9796, 7.9162, 7.8533,\n 7.9582, 8.0627, 8.1667, 8.2702, 8.2074, 8.1449, 8.2479, 8.1858, 8.1240,\n 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When did settlements appear in Madhya Pradesh?\nContext: The first confirmed semipermanent settlements appeared 9,000 years ago in the Bhimbetka rock shelters in modern Madhya Pradesh, India.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.3620, -1.4071, -1.4518, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.7424, -1.7817,\n -1.8209, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -1.9101, -1.9473, -1.9843, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.1196, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.6678, 8.8007, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.9815, 8.8780, 9.0067, 8.9045, 9.0323, 9.1590, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.3333, 9.4563, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.7678, 9.8858, 10.0029, 9.9085,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.7635, 10.6733, 10.5841, 10.6936,\n 10.6052, 10.7141, 10.8224, 10.9301, 11.0371, 10.9497, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.3043, 11.2194, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.3812, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.7031, 11.6219, 11.7217, 11.6412, 11.7405, 11.6606, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 11.9927, 12.0893, 12.0114,\n 11.9340, 12.0302, 11.9534, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.1073, 12.2016, 12.2954, 12.3888, 12.3143, 12.2403, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.5367, 12.4638, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How many families were still without permanent homes?\nContext: He stated that 200,000 homes had been rebuilt, and 685,000 were under reconstruction, but 1.94 million households were still without permanent shelter.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.0178, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.0508, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.3904, 0.3404, 0.4845, 0.6276, 0.5774,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.8374, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129,\n 0.8645, 0.8165, 0.9497, 1.0820, 1.0338, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.1852, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 1.0788, 1.2049, 1.1587, 1.2839, 1.4084, 1.3620,\n 1.4857, 1.4393, 1.3933, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.4546, 1.5752, 1.5298, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.3721, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 9.8237, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.9355, 11.0488, 10.9488, 10.8498, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.2127, 11.1172, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.5615, 12.4746, 12.3883, 12.3027, 12.2178, 12.3167, 12.2325,\n 12.1489, 12.0660, 11.9837, 12.0824, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.5024, 12.4223, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 12.8313, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.1376, 13.0608, 12.9845, 12.9087, 13.0000,\n 12.9247, 12.8499, 12.7756, 12.7017, 12.7928, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the name of the airport the United States built on Ascension Island?\nContext: A local industry manufacturing fibre from New Zealand flax was successfully reestablished in 1907 and generated considerable income during the First World War.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.0739, -2.1172, -1.9545, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.0349, -2.0761, -1.9245,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.6112,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.6888, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 6.0751, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.6823, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.7026, 7.8444, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.3521, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.3789,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.5715, 8.6976, 8.6035, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.8304, 8.7439, 8.8631, 8.9815,\n 8.8958, 8.8108, 8.9285, 8.8443, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.4812, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.3810, 9.4916, 9.6016, 9.5224, 9.6317, 9.7405,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.9067, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.0987, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.3154, 10.4170, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.7714, 10.8702, 10.7978, 10.8961,\n 10.9939, 11.0913, 11.0194, 10.9480, 11.0450, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.2872, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where are most of them from?\nContext: Additionally, there are around 60,000 non-Jewish African immigrants in Israel, some of whom have sought asylum.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "69", + "Fraction of T in Greenlist": "34.7%", + "z-score": "3.15", + "p value": "0.000812", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 2.3094, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 1.2702, 1.5852, 1.4444,\n 1.7457, 1.6082, 1.4757, 1.3480, 1.2247, 1.1055, 0.9901, 1.2687, 1.5396,\n 1.4237, 1.6859, 1.9415, 2.1909, 2.4345, 2.3163, 2.5533, 2.7852, 2.6681,\n 2.8943, 2.7791, 3.0000, 3.2167, 3.1027, 2.9913, 2.8823, 2.7757, 2.6713,\n 2.5690, 2.7775, 2.9824, 3.1840, 3.0817, 3.2796, 3.4743, 3.3729, 3.5642,\n 3.4641, 3.6522, 3.8376, 3.7383, 3.6407, 3.5447, 3.4503, 3.6315, 3.5382,\n 3.4463, 3.3558, 3.5333, 3.7087, 3.8819, 3.7916, 3.9624, 4.1312, 4.0415,\n 4.2080, 4.1192, 4.2836, 4.4462, 4.3580, 4.2710, 4.1851, 4.3451, 4.2601,\n 4.4182, 4.3339, 4.4901, 4.6448, 4.7980, 4.7140, 4.8655, 5.0156, 4.9322,\n 5.0807, 4.9980, 5.1450, 5.0630, 4.9820, 4.9019, 4.8226, 4.7442, 4.6667,\n 4.8111, 4.7341, 4.6580, 4.5826, 4.5079, 4.4341, 4.3609, 4.2885, 4.2167,\n 4.1457, 4.0753, 4.0056, 3.9365, 3.8680, 3.8002, 3.7330, 3.6664, 3.6004,\n 3.5350, 3.4701, 3.6091, 3.5446, 3.6824, 3.6181, 3.5544, 3.4913, 3.4286,\n 3.3665, 3.3049, 3.2437, 3.1831, 3.1229, 3.0632, 3.0039, 2.9451, 2.8868,\n 2.8288, 2.7713, 2.7143, 2.6576, 2.6014, 2.5456, 2.4902, 2.4351, 2.3805,\n 2.3262, 2.4578, 2.4037, 2.5343, 2.4803, 2.6099, 2.7386, 2.6846, 2.6309,\n 2.7585, 2.7050, 2.8316, 2.9575, 2.9040, 3.0290, 2.9756, 3.0997, 3.2230,\n 3.1696, 3.1166, 3.0638, 3.1860, 3.3075, 3.2547, 3.2023, 3.3228, 3.4427,\n 3.3902, 3.5093, 3.4570, 3.5753, 3.6929, 3.6407, 3.5887, 3.5370, 3.4857,\n 3.4346, 3.3838, 3.3333, 3.2831, 3.2332, 3.1836, 3.1342, 3.0851, 3.0363,\n 3.1514])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.8766, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.6702, 8.5347, 8.4017, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.5021, 9.6309, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.2967, 10.4169,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.0254, 10.9229, 10.8215, 10.9355, 11.0488, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.1860, 11.2966, 11.1991, 11.3091, 11.4184, 11.3222, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.6709, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 11.9187, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.1492, 12.2503, 12.1622, 12.2628, 12.1756,\n 12.2758, 12.3754, 12.4746, 12.5732, 12.6713, 12.7690, 12.8661, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.8037, 12.8997, 12.9952, 13.0903, 13.0067,\n 12.9238, 13.0185, 13.1129, 13.0307, 13.1246, 13.0431, 12.9621, 13.0558,\n 13.1491, 13.0688, 13.1617, 13.0821, 13.1746, 13.2668, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.7442, 13.8333,\n 13.9221, 14.0106, 13.9332, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: According to Popper, the scientific selection process favors which type of theory?\nContext: Theories that say more about the way things appear are to be preferred over those that do not; the more generally applicable a theory is, the greater its value.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "23.9%", + "z-score": "-0.277", + "p value": "0.609", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.2222, -0.2765])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.4194, 7.6120, 7.3786, 7.1550, 7.3485,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.1882, 8.3557, 8.5206, 8.6828, 8.8426, 8.6667,\n 8.8252, 8.9815, 8.8121, 8.9672, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.3956, 9.5406, 9.6838, 9.8254, 9.9653, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.0820, 10.2172, 10.0750, 10.2093, 10.0701,\n 9.9333, 9.7989, 9.9333, 9.8015, 9.9351, 10.0673, 9.9384, 9.8116,\n 9.6867, 9.8187, 9.9495, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.7348, 10.6232, 10.7429, 10.8616, 10.7518, 10.8699,\n 10.9870, 11.1033, 10.9955, 11.1111, 11.2259, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.5718, 11.4675, 11.3644, 11.4759, 11.5866, 11.4849, 11.3842,\n 11.4945, 11.3950, 11.2966, 11.1991, 11.1026, 11.2127, 11.3222, 11.2268,\n 11.1324, 11.0389, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.7108, 11.8151, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.2381, 12.3391, 12.2503, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.2034, 12.1184, 12.2178, 12.1335, 12.0499,\n 11.9669, 12.0660, 12.1646, 12.0824, 12.0008, 11.9197, 11.8393, 11.9377,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.3428, 12.2638, 12.3595,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.3163, 12.4109, 12.5049, 12.4283,\n 12.5221, 12.4460, 12.5394, 12.6323, 12.7248, 12.6494, 12.7416, 12.8333,\n 12.7585, 12.8499, 12.7756, 12.8667, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What kind of losses take place in transformers and inductors during conversion/transmission process?\nContext: Power conversion for a DC system takes place mainly in a railway substation where large, heavy, and more efficient hardware can be used as compared to an AC system where conversion takes place aboard the locomotive where space is limited and losses are significantly higher.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.0278, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.4976, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.0513, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.6087, -1.4777, -1.5159, -1.5539, -1.4241, -1.4621, -1.3333,\n -1.3714, -1.4093, -1.2817, -1.3197, -1.3574, -1.3950, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 2.1776, 2.4495,\n 2.3116, 2.5744, 2.8301, 3.0792, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.4816, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.4101, 3.2998, 3.1918, 3.3947,\n 3.5942, 3.7905, 3.6831, 3.5777, 3.7700, 3.9595, 4.1461, 4.3301,\n 4.5115, 4.6904, 4.8669, 4.7610, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.7242, 5.8835, 5.7812, 5.6804,\n 5.5811, 5.7382, 5.8936, 6.0474, 6.1996, 6.3502, 6.2517, 6.1546,\n 6.0587, 6.2075, 6.3549, 6.5008, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.7890, 6.6973, 6.8364, 6.7456, 6.6559, 6.5672,\n 6.4795, 6.3928, 6.5303, 6.6667, 6.8019, 6.7159, 6.6308, 6.7648,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.4193, 7.5472, 7.4625, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.8808, 8.0042, 8.1266, 8.2483, 8.1650,\n 8.2858, 8.2032, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.5516, 8.4718, 8.3927, 8.5088, 8.6241, 8.7388, 8.6603,\n 8.7742, 8.6963, 8.8095, 8.7323, 8.8448, 8.9567, 8.8800, 8.9912,\n 8.9151, 8.8396, 8.7647, 8.6903, 8.6165, 8.7270, 8.8369, 8.9461,\n 8.8728, 8.8000, 8.9086, 9.0167, 9.1242, 9.2311, 9.3374, 9.4432,\n 9.5485, 9.4761, 9.5808, 9.6850, 9.6130, 9.7167, 9.8198, 9.9224,\n 10.0245, 10.1262, 10.0547, 10.1558, 10.0848, 10.0143, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.4427, 10.3730, 10.3038, 10.2350, 10.3333,\n 10.4312, 10.5286, 10.4603, 10.5573, 10.4893, 10.5859, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who was at one time Laemmle's personal secretary?\nContext: Thalberg had been Laemmle's personal secretary, and Laemmle was impressed by his cogent observations of how efficiently the studio could be operated.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.3333, 0.5298, 0.7237, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.2377, -2.2744, -2.1398, -2.0059, -2.0430,\n -2.0799, -2.1167, -1.9843, -2.0212, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.1444, -2.1801, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 5.8966, 5.6921, 5.4958, 5.3072,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.9628, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.5137, 6.4065, 6.5607, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.4639, 7.6033, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.6667, 8.7927, 8.9178, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.2143, 9.1252, 9.0370, 8.9496, 9.0680, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.0910, 10.0131, 9.9357, 9.8590,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.0231, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.6404, 10.5654, 10.6665, 10.7671, 10.8673, 10.9669,\n 11.0661, 10.9917, 11.0904, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.6217, 11.5489, 11.6441, 11.7389, 11.6667,\n 11.7611, 11.6893, 11.6179, 11.5470, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Larry McKinney explained that a two-month delay in drilling could do what?\nContext: Following the accident, a Fortune magazine contacted Larry McKinney, the executive director at the Harte Research Institute for Gulf of Mexico Studies at Texas A&M, and he explained that \"A two-month delay in the Arctic is not a two-month delay ...\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.4288, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.5227, 0.6768, 0.6222, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.6881, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.8165, 0.7688, 0.9017, 0.8540, 0.8066, 0.7595, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.8805, 1.0096, 0.9629, 1.0911, 1.0445,\n 0.9981, 0.9520, 1.0788, 1.2049, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.9558, 1.0777, 1.0336, 0.9897, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.3088, 9.2094, 9.3333, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.0472, 10.1621, 10.0698, 10.1840, 10.2975, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.6311, 11.5471, 11.6487, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.3100, 12.2298, 12.3263, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 12.9540, 12.8771, 12.9691, 12.8928, 12.9845, 13.0758, 13.0000,\n 12.9247, 13.0157, 13.1063, 13.1966, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is KMC an initialism of?\nContext: Kathmandu Metropolitan City (KMC), in order to promote international relations has established an International Relations Secretariat (IRC).\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "79", + "# Tokens in Greenlist": "13", + "Fraction of T in Greenlist": "16.5%", + "z-score": "-1.75", + "p value": "0.96", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.8662, 5.0623, 4.9316, 4.8038, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.0323, 8.9314, 9.0582,\n 8.9586, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.9462, 11.0554, 11.1640, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 11.8287, 11.7395,\n 11.6510, 11.7543, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.2034, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.0067,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.4859, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 14.1309,\n 14.0502, 13.9700, 13.8904, 13.9797, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.2546, 14.3422, 14.4294, 14.5162, 14.4382, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: After completing his studies, under whose rule did he become a regular canon?\nContext: As an adolescent, he had a particular love of theology and the Scriptures became the foundation of his spirituality.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.1437, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.5635, -1.3402, -1.4003, -1.4596, -1.2421, -1.0278, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.0290, -1.0809, -0.9058,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.2188, -2.0735, -2.1128, -2.1520, -2.0083,\n -1.8656, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.8043, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "182", + "Fraction of T in Greenlist": "91.5%", + "z-score": "21.7", + "p value": "3e-104", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.7104, 11.8289, 11.9464, 12.0630,\n 12.1786, 12.2933, 12.4072, 12.5201, 12.6322, 12.7435, 12.8540, 12.9636,\n 13.0725, 13.1806, 13.2879, 13.3945, 13.5004, 13.6056, 13.7100, 13.8138,\n 13.9169, 14.0193, 14.1211, 14.2222, 14.3227, 14.4226, 14.5219, 14.6206,\n 14.7187, 14.8162, 14.9132, 15.0096, 15.1054, 15.2007, 15.2955, 15.3898,\n 15.4835, 15.5767, 15.6694, 15.7617, 15.8534, 15.9447, 16.0355, 16.1258,\n 16.2157, 16.3051, 16.3941, 16.4826, 16.5707, 16.6584, 16.7457, 16.8325,\n 16.9189, 17.0050, 17.0906, 17.1758, 17.2607, 17.3452, 17.4292, 17.5130,\n 17.5963, 17.6793, 17.7619, 17.8442, 17.9261, 18.0077, 18.0889, 18.1698,\n 18.2503, 18.3305, 18.4104, 18.4900, 18.5693, 18.6482, 18.7268, 18.8051,\n 18.8832, 18.9609, 19.0383, 19.1154, 19.1922, 19.2688, 19.3450, 19.4210,\n 19.4967, 19.5721, 19.6472, 19.7221, 19.7967, 19.8710, 19.9451, 20.0189,\n 20.0925, 20.1658, 20.2388, 20.3116, 20.3842, 20.4565, 20.5286, 20.6004,\n 20.6720, 20.7434, 20.8145, 20.8854, 20.9560, 21.0265, 21.0967, 21.1667,\n 21.2364, 21.3060, 21.3753, 21.4444, 21.5133, 21.5820, 21.6505])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When did the United States declare war on Japan?\nContext: On 8 December, the United States, the United Kingdom, Canada, and the Netherlands declared war on Japan, followed by China and Australia the next day.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "41.1%", + "z-score": "3.18", + "p value": "0.000747", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.7457, 2.0370, 1.8974, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.0494, 2.3094, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.8728, 1.7685, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094, 2.3094,\n 2.5064, 2.7005, 2.8919, 2.8006, 2.9887, 2.8983, 2.8093, 2.9938,\n 3.1760])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.1831, 2.0605, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.3333,\n 2.5568, 2.4495, 2.3445, 2.5621, 2.7757, 2.6713, 2.5690, 2.4689,\n 2.6765, 2.5775, 2.7811, 2.9814, 2.8830, 3.0796, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.8919, 2.8006, 2.7107, 2.8983, 3.0833, 3.2660,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.0924, 3.0071, 3.1829, 3.0984,\n 3.2717, 3.4429, 3.6122, 3.7796, 3.9452, 4.1090, 4.0234, 4.1851,\n 4.3451, 4.5035, 4.6603, 4.8154, 4.9691, 5.1212, 5.2719, 5.4212,\n 5.5690, 5.7155, 5.8606, 6.0044, 6.1470, 6.0596, 5.9732, 5.8878,\n 5.8034, 5.7199, 5.8605, 5.7778, 5.9171, 5.8351, 5.7540, 5.8919,\n 6.0287, 5.9481, 5.8684, 5.7894, 5.7112, 5.6338, 5.7689, 5.9029,\n 5.8260, 5.7498, 5.8825, 6.0143, 5.9386, 5.8635, 5.7892, 5.9196,\n 6.0491, 6.1777, 6.1036, 6.0302, 6.1577, 6.2843, 6.2113, 6.1389,\n 6.2644, 6.3892, 6.5130, 6.4409, 6.3694, 6.4923, 6.6144, 6.7358,\n 6.6645, 6.5939, 6.7143, 6.8339, 6.9529, 6.8825, 7.0006, 6.9307,\n 7.0481, 7.1647, 7.2807, 7.3960, 7.5106, 7.6246, 7.5548, 7.4855,\n 7.5988, 7.7114, 7.8233, 7.7544, 7.6859, 7.7971, 7.9078, 7.8397,\n 7.7720, 7.7048, 7.8147, 7.9241, 8.0328, 7.9659, 7.8995, 7.8335,\n 7.9415, 7.8759, 7.9833, 7.9181, 8.0249, 7.9601, 8.0663, 8.0018,\n 8.1075, 8.0433, 7.9796, 8.0847, 8.1892, 8.2933, 8.2298, 8.3333,\n 8.4364, 8.5390, 8.4757, 8.4128, 8.3503, 8.4523, 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When was Adolf Hitlers first visit to the Alps?\nContext: Austrian-born Adolf Hitler had a lifelong romantic fascination with the Alps and by the 1930s established a home in the Obersalzberg region outside of Berchtesgaden.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "9", + "Fraction of T in Greenlist": "36.0%", + "z-score": "1.27", + "p value": "0.102", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.6418, 8.7758, 8.9086, 8.8007, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.7039, 8.6053,\n 8.5079, 8.4116, 8.5396, 8.4444, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.5620, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.5714, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.9091, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.5613, 11.6606, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.7200, 11.6425, 11.7401, 11.8373,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.3888, 12.3143, 12.4074, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the port known as prior to the Swedish occupation of St. Barts?\nContext: Earlier to their occupation, the port was known as \"Car\u00e9nage\".\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, 0.1380, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.6030, 0.7509, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 1.0465, 1.1794, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.9272, 0.8805, 1.0096, 1.1380, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 1.0444, 1.0000,\n 1.1221, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "164", + "Fraction of T in Greenlist": "82.4%", + "z-score": "18.7", + "p value": "2.31e-78", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 6.7543, 6.9570, 7.1550, 7.3485,\n 7.1358, 7.3271, 7.5144, 7.6980, 7.8780, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.3742, 8.1882, 8.3557, 8.5206, 8.6828, 8.5057, 8.6667,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.1036,\n 10.2404, 10.3758, 10.5096, 10.6421, 10.7732, 10.9030, 10.7575, 10.8866,\n 11.0145, 11.1412, 11.2667, 11.1261, 11.2510, 11.3747, 11.4974, 11.6189,\n 11.7395, 11.8589, 11.7239, 11.8429, 11.9609, 12.0779, 12.1940, 12.0630,\n 12.1786, 12.2933, 12.4072, 12.5201, 12.6322, 12.7435, 12.6170, 12.7279,\n 12.8380, 12.9473, 13.0558, 12.9326, 13.0408, 13.1482, 13.2549, 13.3609,\n 13.4661, 13.5707, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.7477,\n 13.8497, 13.9510, 14.0518, 14.1519, 14.2514, 14.3503, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.6283, 14.5173, 14.6141, 14.7103, 14.8059, 14.9011,\n 14.9957, 15.0898, 14.9817, 15.0756, 15.1690, 15.2619, 15.3543, 15.2483,\n 15.3405, 15.4323, 15.5236, 15.6144, 15.7048, 15.7948, 15.6911, 15.7809,\n 15.8702, 15.9591, 16.0476, 15.9459, 16.0342, 16.1220, 16.2095, 16.2966,\n 16.3833, 16.4696, 16.3700, 16.4561, 16.5418, 16.6272, 16.7122, 16.6143,\n 16.6991, 16.7835, 16.8676, 16.9514, 17.0348, 17.1178, 17.0218, 17.1047,\n 17.1873, 17.2695, 17.3514, 17.2568, 17.3386, 17.4200, 17.5011, 17.5818,\n 17.6623, 17.7424, 17.6497, 17.7297, 17.8094, 17.8888, 17.9678, 17.8764,\n 17.9554, 18.0340, 18.1124, 18.1905, 18.2683, 18.3458, 18.2559, 18.3333,\n 18.4105, 18.4873, 18.5639, 18.4752, 18.5517, 18.6278, 18.7038])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When was USB Battery Charging Specification Revision 1.2 released?\nContext: The USB Battery Charging Specification Revision 1.2 (released in 2010) makes clear that there are safety limits to the rated current at 5 A coming from USB 2.0.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.8783, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 1.0070, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.8889, 0.8295, 0.9909, 0.9316, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.2808, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.2060, 1.1514, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.7085, 1.6554, 1.7913, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.5236, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.3926,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.6737, 1.6262, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.7310, 1.6843, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.6208, 1.7410, 1.6951, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660, 2.8868,\n 2.5560, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641, 3.2206, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284, 2.6558, 2.4910, 2.7778,\n 2.6186, 2.8947, 2.7406, 3.0072, 2.8577, 3.1156, 2.9704, 2.8301, 2.6943,\n 2.5627, 2.4351, 2.6811, 2.5560, 2.4345, 2.3163, 2.2011, 2.0889, 2.3238,\n 2.2133, 2.4422, 2.3333, 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.0428,\n 1.9462, 1.8516, 1.7589, 1.6678, 1.8791, 2.0870, 1.9959, 2.1997, 2.4004,\n 2.5981, 2.7928, 2.7005, 2.8919, 3.0806, 2.9887, 2.8983, 2.8093, 2.9938,\n 2.9057, 3.0873, 3.0000, 3.1789, 3.3556, 3.2686, 3.1829, 3.0984, 3.0151,\n 2.9329, 3.1052, 3.2757, 3.1937, 3.1129, 3.0330, 3.2004, 3.1211, 3.2863,\n 3.2077, 3.3708, 3.2928, 3.4538, 3.3764, 3.2998, 3.2242, 3.1493, 3.0754,\n 3.2332, 3.1597, 3.0870, 3.0151, 2.9439, 2.8735, 3.0282, 2.9582, 3.1111,\n 3.0415, 2.9726, 2.9044, 3.0551, 2.9872, 2.9200, 2.8534, 2.7875, 2.7222,\n 2.6575, 2.5934, 2.7406, 2.8868, 2.8226, 2.9673, 3.1109, 3.2533, 3.3947,\n 3.3301, 3.4701, 3.6091, 3.5446, 3.4806, 3.4171, 3.5544, 3.4913, 3.6274,\n 3.5645, 3.6995, 3.8335, 3.7707, 3.7084, 3.6466, 3.5853, 3.5245, 3.6566,\n 3.7878, 3.7270, 3.6667, 3.6068, 3.7366, 3.6770, 3.8057, 3.7463, 3.8741,\n 3.8150, 3.9418, 3.8829, 3.8244, 3.7664, 3.7087, 3.6515, 3.7766, 3.7196,\n 3.6629, 3.6067, 3.5508, 3.4953, 3.6188, 3.5635, 3.6862, 3.6310, 3.5762,\n 3.5218, 3.6433, 3.5890, 3.5351, 3.4816, 3.4283, 3.3754, 3.3228, 3.2705,\n 3.3902, 3.5093, 3.4570, 3.5753, 3.6929, 3.8100, 3.9265, 3.8739, 3.9896,\n 4.1048, 4.0522, 4.0000, 3.9481, 4.0622, 4.0105, 4.1239, 4.0723, 4.1851,\n 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who did Yaroslav's sons marry?\nContext: Yaroslav the Wise, whose stepmother belonged to the Macedonian dynasty, the greatest one to rule Byzantium, married the only legitimate daughter of the king who Christianized Sweden.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "28.9%", + "z-score": "0.824", + "p value": "0.205", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.6537, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825, 3.6566,\n 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.8856, 2.1939, 2.4910, 2.3333,\n 2.6186, 2.8947, 3.1623, 3.4219, 3.2660, 3.5176, 3.3665, 3.6108, 3.8490,\n 4.0814, 4.3083, 4.1603, 4.0166, 4.2378, 4.4544, 4.6664, 4.8742, 5.0779,\n 5.2778, 5.1371, 5.0000, 4.8662, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997,\n 5.6830, 5.5549, 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919,\n 6.0622, 5.9438, 5.8275, 5.7133, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.5137, 6.4065, 6.5607, 6.7132, 6.8641,\n 7.0133, 7.1611, 7.0553, 6.9511, 7.0973, 6.9945, 7.1393, 7.2827, 7.4247,\n 7.5653, 7.7047, 7.6033, 7.5032, 7.6413, 7.5425, 7.6794, 7.8150, 7.9495,\n 8.0829, 8.2151, 8.1176, 8.0212, 8.1524, 8.2825, 8.4116, 8.3164, 8.4444,\n 8.5715, 8.6976, 8.6035, 8.5105, 8.6357, 8.5437, 8.4526, 8.3625, 8.4868,\n 8.6102, 8.7327, 8.6436, 8.5553, 8.6770, 8.5896, 8.5030, 8.4173, 8.5381,\n 8.6581, 8.7773, 8.6924, 8.6083, 8.7267, 8.6433, 8.5607, 8.4788, 8.5964,\n 8.7133, 8.8294, 8.7482, 8.6677, 8.7831, 8.7033, 8.6241, 8.5456, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.8448, 8.7681, 8.6921, 8.6166, 8.7284,\n 8.8396, 8.9502, 8.8752, 8.8008, 8.9107, 8.8369, 8.7636, 8.6908, 8.8000,\n 8.9086, 9.0167, 8.9444, 8.8726, 8.9800, 8.9087, 8.8379, 8.7676, 8.8744,\n 8.9806, 9.0863, 9.0164, 8.9469, 9.0520, 8.9830, 8.9145, 8.8464, 8.9509,\n 9.0549, 9.1584, 9.0906, 9.0233, 9.1262, 9.0593, 8.9929, 8.9268, 9.0292,\n 9.1310, 9.2324, 9.1667, 9.1013, 9.2022, 9.3026, 9.2376, 9.3375, 9.4370,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What were added to scoring in 2007?\nContext: The final score is calculated by taking deductions from the E score, and adding the result to the D score.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.6449, -1.6997, -1.4940, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -1.9545, -1.9980, -2.0412,\n -2.0841, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.6984, -2.7361, -2.7735, -2.8107, -2.8478, -2.7014, -2.7386,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -2.8887, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.7894, -2.8245, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.7990, -2.8333,\n -2.8675, -2.7358, -2.7701, -2.8043, -2.8383, -2.8721, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "66.5%", + "z-score": "13.5", + "p value": "1.52e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.1389, 5.9530, 5.7735, 5.9797, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.0849, 5.9479, 6.1283, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.1611, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.1111, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.6534, 10.7635, 10.8729, 10.9816, 10.8916,\n 10.8025, 10.9107, 10.8224, 10.7349, 10.8426, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.3043, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 12.0341, 11.9504, 11.8673,\n 11.7849, 11.7031, 11.6219, 11.5414, 11.6412, 11.7405, 11.8393, 11.7595,\n 11.6802, 11.6016, 11.5234, 11.6220, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.3163, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.4510])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What palace was the place of creation for illustrated manuscripts?\nContext: In Topkapi Palace, these manuscripts were created by the artists working in Nakkashane, the atelier of the miniature and illumination artists.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.1342, -0.2000, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.7823,\n -1.8251, -1.6710, -1.5181, -1.5614, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.2423, -1.2857, -1.3288, -1.1825, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.7462, -0.6128, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.5518, -0.5927, -0.4644, -0.3369, -0.3780, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.2885, 8.1750, 8.0632, 8.2035, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.3333, 9.2351, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 9.8389, 9.7473, 9.6566, 9.5668, 9.6828, 9.5938,\n 9.5057, 9.4185, 9.3320, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 10.0647, 10.1745, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.4652, 10.3835, 10.3024,\n 10.2220, 10.3284, 10.2486, 10.1695, 10.0910, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.0661, 10.9917, 10.9178, 10.8444, 10.9431, 10.8702, 10.7978, 10.7258,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.1667,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where does most of Canada's asphalt end up these days?\nContext: The Canadian province of Alberta has most of the world's reserves of natural bitumen, in three huge deposits covering 142,000 square kilometres (55,000 sq mi), an area larger than England or New York state.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.0038, -1.0445,\n -1.0849, -0.9520, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 7.4194, 7.1832, 7.3786, 7.5697, 7.3485,\n 7.5378, 7.7232, 7.9048, 8.0829, 7.8780, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.7039, 8.8648, 9.0233, 9.1795, 9.0000,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.1084, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 10.0820, 9.9392, 9.7989, 9.6612, 9.5258,\n 9.6630, 9.7989, 9.6667, 9.5366, 9.6719, 9.8058, 9.9384, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.5859,\n 10.4650, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 10.9621, 11.0810, 10.9669, 10.8542, 10.9727, 11.0902, 10.9794, 11.0963,\n 11.2124, 11.1033, 10.9955, 11.1111, 11.2259, 11.3399, 11.4531, 11.3473,\n 11.2427, 11.1392, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.3091, 11.2127, 11.3222, 11.2268,\n 11.3357, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.7108, 11.6206, 11.5311, 11.6356, 11.5470,\n 11.4592, 11.3721, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.6311, 11.7326, 11.8336, 11.9341, 11.8503, 11.7672, 11.6847,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.9197, 11.8393, 11.7595,\n 11.8579, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.5221, 12.4460, 12.5394, 12.6323, 12.7248, 12.8169, 12.9087, 12.8333,\n 12.9247, 13.0157, 12.9410, 12.8667, 12.7928, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What kind of books did housebuilders use?\nContext: Vernacular architecture became increasingly ornamental.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.2998, -2.0428, -2.1019, -2.1602,\n -1.9118, -1.6678, -1.7288, -1.7889, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -1.9127, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.0050, -1.0513, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.0069, -0.8601, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.5101, -0.3698, -0.4147, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.4045, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.4327, -0.3021, -0.1721, -0.2146, -0.2568, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.2100, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.8573, 6.7269, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.0934, 7.9849, 7.8779,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.5448, 8.4449, 8.5749, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.5715, 8.6976, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.3951, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.0133, 10.1243, 10.0389, 9.9542, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.2602, 11.3610, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.0893, 12.1854,\n 12.1076, 12.2033, 12.2987, 12.2214, 12.3163, 12.4109, 12.3342, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.2954, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which scientist noticed the relationship between the speed and distance of galaxies?\nContext: The observation by Edwin Hubble in 1929 that the speed at which galaxies recede positively correlates with their distance, led to the understanding that the universe is expanding, and the formulation of the Big Bang theory by Georges Lema\u00eetre.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.8034, 1.6859, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.7559, 0.6888, 0.8716, 0.8047, 0.9847,\n 0.9180, 0.8520, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.8889, 0.8295, 0.9909, 0.9316, 0.8729,\n 1.0319, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.3943, 1.3373, 1.4857, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.1183, 1.0659, 1.0139, 0.9623,\n 1.1028, 1.2423, 1.1905, 1.1390, 1.2771, 1.4142, 1.5505, 1.4985,\n 1.4470, 1.5818, 1.7158, 1.8490, 1.7970, 1.7454, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.2377, 1.1918,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.3019, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641, 3.7808, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 4.9507, 4.7819, 5.0037,\n 4.8407, 5.0576, 5.2697, 5.4772, 5.6805, 5.8797, 6.0751, 6.2668, 6.4550,\n 6.2993, 6.4846, 6.6667, 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 6.7568,\n 6.9286, 6.7893, 6.9589, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 6.7648,\n 6.9282, 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.1909, 7.3401, 7.2296, 7.3773,\n 7.2684, 7.4146, 7.3073, 7.4521, 7.3464, 7.4897, 7.3855, 7.5275, 7.4247,\n 7.5653, 7.4639, 7.6033, 7.5032, 7.6413, 7.5425, 7.6794, 7.5818, 7.7174,\n 7.6210, 7.7555, 7.6603, 7.7937, 7.6995, 7.8318, 7.7387, 7.8699, 7.7778,\n 7.9079, 7.8168, 7.9460, 7.8558, 7.9839, 7.8948, 8.0219, 7.9336, 8.0598,\n 7.9724, 8.0976, 8.0111, 8.1354, 8.0497, 8.1731, 8.0882, 8.2107, 8.1266,\n 8.2483, 8.1650, 8.2858, 8.2032, 8.3231, 8.2413, 8.3605, 8.2793, 8.3977,\n 8.3172, 8.4348, 8.3550, 8.4718, 8.3927, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.5052, 8.6190, 8.5424, 8.6556, 8.5796, 8.6921, 8.6166, 8.7284,\n 8.6535, 8.7647, 8.6903, 8.8008, 8.7270, 8.8369, 8.7636, 8.8728, 8.8000,\n 8.9086, 8.8364, 8.9444, 8.8726, 8.9800, 8.9087, 9.0155, 8.9447, 9.0510,\n 8.9806, 9.0863, 9.0164, 9.1215, 9.0520, 9.1566, 9.0876, 9.1916, 9.1230,\n 9.2265, 9.1584, 9.2613, 9.1936, 9.2960, 9.2287, 9.3306, 9.2637, 9.3651,\n 9.2986, 9.3995, 9.3333, 9.4338, 9.3680, 9.4680, 9.4026, 9.5021, 9.4370,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is Kingsbridge Armory being turned into?\nContext: The Kingsbridge Armory, often cited as the largest armory in the world, is scheduled for redevelopment as the Kingsbridge National Ice Center.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.3658, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.2151, -2.2629, -2.0732, -2.1213,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.0096, -1.8385, -1.8843, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.7321, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -1.9935, -2.0349, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.3538, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.4099, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.1207, 7.0133, 6.9076, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.7927, 8.6976, 8.6035, 8.7287,\n 8.6357, 8.7600, 8.8833, 8.7913, 8.9138, 8.8228, 8.9444, 8.8544,\n 8.7652, 8.6770, 8.7978, 8.7104, 8.8304, 8.7439, 8.6581, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.3380, 9.2554, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.1151,\n 10.0353, 10.1423, 10.0631, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.1469, 10.2516, 10.1749, 10.2790, 10.2029, 10.1273, 10.0523, 10.1559,\n 10.0814, 10.1846, 10.1106, 10.0371, 9.9642, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.5998, 10.6990, 10.6271, 10.5556,\n 10.6544, 10.7527, 10.8505, 10.9480, 11.0450, 10.9740, 11.0705, 11.1667,\n 11.2624, 11.1919, 11.2872, 11.3820, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When are 192 samples taken instead of 576?\nContext: If there is a transient, 192 samples are taken instead of 576.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "23.3%", + "z-score": "-0.338", + "p value": "0.632", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, 0.0000, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 5.1962, 4.7469, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.8766, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.4017, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.5819, 8.7250, 8.8667, 9.0068, 9.1455, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.7897, 9.6813, 9.8064, 9.6995, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.7986, 9.6995, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.4346, 9.5543, 9.6732, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.2287, 10.3409, 10.4524, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.3209, 10.2348, 10.3445, 10.4537, 10.5621, 10.4769, 10.3923,\n 10.5002, 10.4164, 10.3333, 10.4407, 10.3583, 10.4652, 10.5714, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.8282, 10.7480, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.2816, 11.2028, 11.1245, 11.0468,\n 10.9697, 10.8931, 10.8170, 10.7415, 10.6665, 10.7671, 10.8673, 10.9669,\n 10.8925, 10.8186, 10.7451, 10.6722, 10.5998, 10.5278, 10.6271, 10.5556,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.7090, 10.8064, 10.9034, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.3120, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which of Calatrava's creations contains an IMAX theater?\nContext: i les Ci\u00e8ncies), which contains an opera house/performing arts centre, a science museum, an IMAX cinema/planetarium, an oceanographic park and other structures such as a long covered walkway and restaurants.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.6353, 1.5323, 1.4317, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.4907, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.5133, 1.6997, 1.8838, 1.8074,\n 1.7321, 1.9127, 2.0913, 2.0158, 1.9413, 1.8677, 1.7951, 1.9695,\n 2.1420, 2.0692, 2.2393, 2.4077, 2.3349, 2.2629, 2.4286, 2.5927,\n 2.5207, 2.4495, 2.3791, 2.3094, 2.4703, 2.6296, 2.5600, 2.7175,\n 2.8735, 2.8039, 2.7349, 2.6667, 2.5991, 2.5322, 2.6852, 2.6186,\n 2.5527, 2.4874, 2.4227, 2.5731, 2.5087, 2.4449, 2.3817, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.0412,\n 1.9825, 2.1268, 2.0682, 2.0101, 1.9524, 2.0948, 2.0373, 2.1783,\n 2.1210, 2.2608, 2.2037, 2.1470, 2.2852, 2.4225, 2.3657, 2.5019,\n 2.4453, 2.3891, 2.3333, 2.2780, 2.4122, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.3262, 2.2723, 2.2188, 2.3500, 2.2966, 2.4267, 2.3735,\n 2.3206, 2.2680, 2.3967, 2.3443, 2.2923, 2.2406, 2.3679, 2.3163,\n 2.2650, 2.3912, 2.3400, 2.2892, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 2.1637, 2.1145, 2.2377, 2.3603, 2.3110, 2.2620, 2.3835,\n 2.3346, 2.4553, 2.4065, 2.3580, 2.3098, 2.4294, 2.3812, 2.5000,\n 2.6182, 2.7358, 2.6874, 2.6393, 2.5915, 2.7080, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "202", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "27.7%", + "z-score": "0.894", + "p value": "0.186", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495, 2.1170,\n 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 0.8083, 0.6794, 0.5556,\n 0.8729, 0.7505, 1.0541, 0.9333, 1.2247, 1.1055, 0.9901, 0.8783, 0.7698,\n 1.0441, 0.9366, 0.8321, 0.7303, 0.6312, 0.5345, 0.4402, 0.6963, 0.6025,\n 0.8513, 0.7579, 1.0000, 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857,\n 0.7006, 0.6172, 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637,\n 0.2887, 0.2148, 0.4264, 0.6348, 0.5601, 0.7646, 0.9661, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328, 1.2189,\n 1.1476, 1.3308, 1.2599, 1.4403, 1.3697, 1.2999, 1.2309, 1.1628, 1.0954,\n 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.9428, 0.8793, 1.0498, 0.9864,\n 1.1547, 1.0915, 1.0290, 0.9671, 0.9058, 1.0705, 1.0094, 0.9488, 0.8889,\n 0.8295, 0.7707, 0.7124, 0.8729, 0.8147, 0.9733, 0.9152, 1.0721, 1.0141,\n 0.9567, 0.8997, 1.0541, 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201,\n 0.6660, 0.6124, 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.6983, 0.8447,\n 0.7921, 0.9372, 1.0812, 1.2243, 1.3663, 1.3128, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.2771, 1.2257, 1.3625, 1.3112, 1.4470,\n 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954, 1.0465, 0.9979,\n 0.9497, 1.0820, 1.0338, 1.1651, 1.1169, 1.2472, 1.1991, 1.1513, 1.1038,\n 1.0565, 1.1852, 1.1380, 1.0911, 1.0445, 0.9981, 0.9520, 0.9062, 1.0328,\n 0.9870, 1.1127, 1.0670, 1.1918, 1.1461, 1.1007, 1.0555, 1.1790, 1.1339,\n 1.0890, 1.0444, 1.0000, 0.9558, 0.9119, 0.8682, 0.8248, 0.9461, 0.9027,\n 0.8595, 0.8165, 0.7737, 0.8937])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is Seattle's average December temperature?\nContext: Winters are cool and wet with December, the coolest month, averaging 40.6 \u00b0F (4.8 \u00b0C), with 28 annual days with lows that reach the freezing mark, and 2.0 days where the temperature stays at or below freezing all day; the temperature rarely lowers to 20 \u00b0F (\u22127 \u00b0C).\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 0.8083, 0.6794, 1.0000,\n 1.3093, 1.1793, 1.0541, 1.3480, 1.6330, 1.5076, 1.3862, 1.2687, 1.5396,\n 1.8034, 1.6859, 1.5717, 1.4606, 1.7132, 1.6036, 1.8489, 2.0889, 1.9795,\n 1.8728, 2.1054, 2.0000, 1.8970, 1.7963, 1.6977, 1.6013, 1.8240, 1.7285,\n 1.6348, 1.8516, 2.0647, 1.9711, 2.1798, 2.0870, 1.9959, 1.9064, 2.1094,\n 2.0207, 1.9335, 1.8477, 1.7634, 1.6803, 1.5986, 1.7942, 1.9870, 2.1773,\n 2.3651, 2.2819, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.3238, 2.2453,\n 2.1678, 2.3448, 2.5198, 2.4423, 2.3658, 2.2902, 2.2156, 2.1420, 2.3126,\n 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.1213, 2.2862, 2.4495, 2.3791,\n 2.3094, 2.2405, 2.4010, 2.5600, 2.4910, 2.6481, 2.5796, 2.5117, 2.4444,\n 2.5991, 2.7524, 2.6852, 2.6186, 2.5527, 2.7037, 2.6381, 2.5731, 2.5087,\n 2.4449, 2.3817, 2.5298, 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.3660,\n 2.3054, 2.2454, 2.3891, 2.5318, 2.4717, 2.6131, 2.5532, 2.4938, 2.4348,\n 2.5744, 2.5156, 2.4574, 2.3995, 2.3422, 2.2852, 2.2287, 2.3657, 2.5019,\n 2.6370, 2.7713, 2.7143, 2.8475, 2.7906, 2.7341, 2.6781, 2.6224, 2.5672,\n 2.6984, 2.6433, 2.7735, 2.7186, 2.8478, 2.9761, 2.9212, 2.8666, 2.8124,\n 2.7585, 2.7050, 2.6519, 2.5990, 2.5466, 2.4944, 2.4426, 2.3912, 2.3400,\n 2.4653, 2.4142, 2.3635, 2.4877, 2.4371, 2.3868, 2.5099, 2.6323, 2.5820,\n 2.5319, 2.4822, 2.6034, 2.5538, 2.5044, 2.6247, 2.5754, 2.5265, 2.4778,\n 2.5969, 2.7154, 2.6667, 2.6182, 2.5700, 2.6874, 2.6393, 2.5915, 2.5439,\n 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.2549, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.6075, 6.7583, 6.9076, 6.8034, 6.7006, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.5258, 7.4316, 7.5661, 7.6995,\n 7.6064, 7.7387, 7.6466, 7.5556, 7.4655, 7.5967, 7.5076, 7.6376,\n 7.7667, 7.8948, 7.8065, 7.7192, 7.8463, 7.7598, 7.8859, 8.0111,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.2107, 8.1266, 8.2483, 8.1650,\n 8.0824, 8.0006, 8.1214, 8.0403, 7.9600, 7.8803, 8.0002, 8.1192,\n 8.0402, 7.9619, 7.8842, 8.0024, 7.9253, 8.0427, 8.1594, 8.2754,\n 8.1988, 8.3140, 8.2381, 8.3526, 8.4664, 8.3910, 8.3162, 8.4293,\n 8.3550, 8.2813, 8.2082, 8.1356, 8.2479, 8.1758, 8.2874, 8.3984,\n 8.5088, 8.4371, 8.3660, 8.4757, 8.4050, 8.3349, 8.2652, 8.1960,\n 8.1273, 8.2362, 8.1679, 8.1001, 8.2084, 8.1410, 8.0741, 8.1817,\n 8.2887, 8.2221, 8.3286, 8.4345, 8.3683, 8.3024, 8.4078, 8.5126,\n 8.6169, 8.7207, 8.8240, 8.9268, 8.8612, 8.9635, 9.0653, 9.1667,\n 9.2676, 9.3680, 9.4680, 9.5675, 9.5021, 9.4370, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Bell learned to accurately read lips even without knowing what?\nContext: In this treatise, his father explains his methods of how to instruct deaf-mutes (as they were then known) to articulate words and read other people's lip movements to decipher meaning.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "19.3%", + "z-score": "-1.2", + "p value": "0.886", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.3333, 8.2121, 8.0928, 8.2369, 8.1196, 8.0042,\n 7.8905, 7.7784, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 8.8036, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.2435, 9.3611, 9.2729, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 10.1745, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.3583, 10.4652, 10.5714, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.9091, 10.8282, 10.9317, 10.8515, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.0004, 11.1018, 11.2028, 11.3032, 11.2250,\n 11.3249, 11.4244, 11.5234, 11.6220, 11.5444, 11.6425, 11.5655, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.7050, 11.8014, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.7687, 11.8638, 11.9586, 11.8849, 11.8117, 11.9060, 12.0000,\n 11.9273, 12.0209, 12.1141, 12.2068, 12.2992, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is Oklahoma's largest school district?\nContext: Oklahoma City is home to the state's largest school district, Oklahoma City Public Schools.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.3%", + "z-score": "0.41", + "p value": "0.341", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.0512, 0.0000,\n 0.1525, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.5941,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.6108, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.6430, 0.5952, 0.5477,\n 0.6825, 0.8165, 0.7688, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.8704,\n 0.8245, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.4525, 0.4103])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 5.0000,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 5.9132, 6.0883, 5.9628, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.4006, 6.5514, 6.4501, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.9378, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.0219, 7.9336, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.7267, 8.6433, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.6321, 8.5516, 8.6677, 8.7831, 8.8978, 9.0117, 9.1250, 9.0452,\n 9.1577, 9.0786, 9.1905, 9.3017, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.0987, 10.0231, 9.9481, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.3435, 10.2706,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.3148, 10.4140, 10.3439, 10.4427, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.7222, 10.8184, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is a name for the reduced complement of genetic material necessary for an organism to live?\nContext: There is experimental work being done on minimal genomes for single cell organisms as well as minimal genomes for multi-cellular organisms (see Developmental biology).\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.8131, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.9949, 1.1991, 1.1202, 1.3206, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.2910,\n 1.4755, 1.4027, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.6524, 1.8257, 1.7552, 1.9262, 1.8559, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.9829, 1.9149, 2.0785, 2.2405, 2.1723, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 2.0918, 2.0276, 2.1822,\n 2.3354, 2.2711, 2.4227, 2.3586, 2.5087, 2.4449, 2.3817, 2.3190,\n 2.4669, 2.4045, 2.5508, 2.6961, 2.6336, 2.7775, 2.7153, 2.6536,\n 2.5925, 2.5318, 2.4717, 2.4121, 2.5532, 2.4938, 2.6336, 2.7724,\n 2.7129, 2.8505, 2.7913, 2.9277, 2.8687, 2.8101, 2.7520, 2.8868,\n 2.8288, 2.9625, 3.0952, 3.0373, 2.9798, 2.9227, 2.8660, 2.9971,\n 2.9406, 2.8845, 2.8288, 2.7735, 2.7186, 2.6640, 2.6099, 2.7386,\n 2.8666, 2.9938, 2.9394, 2.8853, 2.8316, 2.7783, 2.9040, 2.8508,\n 2.7979, 2.7454, 2.6932, 2.8174, 2.7654, 2.7137, 2.6623, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.4099, 2.5319, 2.6534, 2.7741, 2.7240,\n 2.6742, 2.6247, 2.5754, 2.5265, 2.6458, 2.7644, 2.7154, 2.8333,\n 2.7844, 2.7358, 2.6874, 2.6393, 2.5915, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.1343, 9.0323, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.2763, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.2287, 10.3409, 10.2514, 10.3630, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.3209, 10.2348, 10.3445, 10.4537, 10.5621, 10.4769, 10.3923,\n 10.5002, 10.4164, 10.5238, 10.6306, 10.5475, 10.6537, 10.5714, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.6683, 10.5893,\n 10.5109, 10.4330, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.7604, 11.6840, 11.7808, 11.7050, 11.6297, 11.7261, 11.6514, 11.7473,\n 11.8429, 11.7687, 11.8638, 11.7901, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.0419, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In what year was the Mananga Management Centre founded?\nContext: The Mananga management centre was established as Mananga Agricultural Management Centre in 1972 as an International Management Development Centre catering for middle and senior managers, it is located at Ezulwini.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "184", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "22.3%", + "z-score": "-0.851", + "p value": "0.803", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.5345, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.6128, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.8620, 6.5997,\n 6.3509, 6.5672, 6.3333, 6.5465, 6.7543, 6.5354, 6.7402, 6.9402,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.3030,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.1654, 8.3267, 8.4857, 8.6424, 8.4856, 8.6410,\n 8.7943, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 10.2172, 10.3510, 10.2093, 10.0701,\n 10.2036, 10.0673, 10.2000, 10.3314, 10.1983, 10.3288, 10.4581, 10.3280,\n 10.1999, 10.3287, 10.2030, 10.3310, 10.2075, 10.0857, 9.9656, 10.0935,\n 10.2202, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.6066,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.2207, 12.3289, 12.2221, 12.1164, 12.2244, 12.1200, 12.2275,\n 12.3343, 12.2314, 12.3377, 12.4434, 12.3419, 12.4471, 12.3468, 12.2474,\n 12.3524, 12.2541, 12.1568, 12.0605, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.6800, 12.7812, 12.6867, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.8017, 12.9011, 13.0000, 12.9085, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 13.9690, 13.8804, 13.9735, 13.8857, 13.9784, 14.0707, 13.9838, 14.0758,\n 14.1673, 14.0813, 13.9959, 14.0872, 14.0025, 14.0936, 14.0096, 13.9262,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.3897, 14.3087, 14.3970, 14.4850, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.5871, 14.6738, 14.7601, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What rank provided its holder territorial rule?\nContext: Each successive rank gave its holder greater pensions and legal privileges.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "24.4%", + "z-score": "-0.183", + "p value": "0.572", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.2716, -0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "158", + "Fraction of T in Greenlist": "79.4%", + "z-score": "17.7", + "p value": "1.43e-70", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.4293, 8.5979, 8.7636,\n 8.9265, 8.7305, 8.8926, 9.0520, 9.2091, 9.3638, 9.5163, 9.6667,\n 9.8150, 9.9613, 10.1057, 10.2482, 10.0718, 10.2138, 10.3540, 10.1840,\n 10.0178, 10.1585, 10.2976, 10.1368, 10.2752, 10.4119, 10.5472, 10.6810,\n 10.5269, 10.6600, 10.7918, 10.9222, 11.0513, 11.1791, 11.3056, 11.4310,\n 11.5551, 11.6781, 11.8000, 11.6559, 11.7773, 11.8977, 11.7572, 11.6189,\n 11.7395, 11.8589, 11.7239, 11.8429, 11.9609, 12.0779, 12.1940, 12.0630,\n 12.1786, 12.2933, 12.4072, 12.5201, 12.6322, 12.7435, 12.8540, 12.9636,\n 13.0725, 13.1806, 13.0558, 13.1636, 13.2706, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.1221, 13.2280, 13.3333, 13.4379, 13.5419, 13.4259, 13.5295,\n 13.6324, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.3222, 14.4200, 14.5173, 14.4075, 14.2988, 14.3961, 14.4928,\n 14.3857, 14.4822, 14.5781, 14.6736, 14.7685, 14.6634, 14.7580, 14.8522,\n 14.9459, 15.0391, 15.1318, 15.2240, 15.3158, 15.4072, 15.4980, 15.5885,\n 15.4867, 15.5769, 15.6667, 15.5662, 15.4666, 15.5563, 15.6457, 15.5473,\n 15.6365, 15.7252, 15.8135, 15.9014, 15.8046, 15.8923, 15.9796, 16.0665,\n 16.1531, 16.2392, 16.3250, 16.4104, 16.4954, 16.5801, 16.6644, 16.5702,\n 16.6543, 16.7381, 16.6450, 16.5525, 16.6363, 16.7197, 16.6282, 16.7115,\n 16.7944, 16.8770, 16.9592, 16.8690, 16.9511, 17.0328, 17.1143, 17.1954,\n 17.2762, 17.3567, 17.4369, 17.5168, 17.5963, 17.6756, 17.5875, 17.6667,\n 17.7455, 17.6583, 17.5716, 17.6504, 17.7290, 17.6431, 17.7215])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What happened to Hornswoggle?\nContext: Dave Finlay was often aided in his matches by a midget known mainly as Hornswoggle while in WWE, who hid under the ring and gave a shillelagh to Finlay to use on his opponent.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "17.5%", + "z-score": "-2.19", + "p value": "0.986", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.6997, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -1.7988, -1.8475, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.1470, -2.1880, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.0605,\n -2.1004, -2.1401, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.3409, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.8718, 6.7254, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.3812, 10.5027, 10.6232, 10.7429, 10.8616, 10.9794, 10.8699,\n 10.9870, 11.1033, 10.9955, 11.1111, 11.2259, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.8982, 12.0077, 12.1164, 12.0118, 11.9083, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.0286, 12.1353, 12.2414, 12.3468, 12.2474,\n 12.3524, 12.2541, 12.3586, 12.4625, 12.5657, 12.6684, 12.5717, 12.6739,\n 12.7755, 12.6800, 12.7812, 12.8819, 12.9820, 12.8877, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.2882, 13.3854, 13.4822, 13.5784, 13.4868,\n 13.5827, 13.6781, 13.5876, 13.6826, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.3380, 14.4292, 14.5199, 14.4321,\n 14.3449, 14.2584, 14.1725, 14.2633, 14.1781, 14.0936, 14.0096, 14.1003,\n 14.1906, 14.1074, 14.1974, 14.2870, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.5659, 14.6534, 14.7406, 14.6599, 14.7468, 14.8333,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What percentage of Mexico City's population was indigenous in 1921?\nContext: In 1921, Mexico City had less than one million inhabitants.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.2000, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.2261, 0.1803, 0.1348, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.5740, 0.5283, 0.4828, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.4145, 0.5375, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.6307, 5.8140, 5.9944, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 6.8810, 7.0387, 6.9204, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.9178, 8.8227, 8.9469,\n 8.8529, 8.9763, 8.8833, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 8.9752, 8.8860, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.6758, 9.7890, 9.7034,\n 9.6186, 9.7312, 9.8430, 9.7590, 9.8702, 9.7869, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.0904, 11.1886, 11.2864, 11.2126, 11.3099, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.5489, 11.6441, 11.7389, 11.6667,\n 11.7611, 11.8551, 11.7833, 11.8769, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In what year did a French magazine describe the use of asphalt?\nContext: One hundred years after the fall of Constantinople in 1453, Pierre Belon described in his work Observations in 1553 that pissasphalto, a mixture of pitch and bitumen, was used in Dubrovnik for tarring of ships from where it was exported to a market place in Venice where it could be bought by anyone.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "18.8%", + "z-score": "-1.83", + "p value": "0.966", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.0548, -1.8843, -1.9298, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.6591, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -1.8983, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -1.8257])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.7035, 0.9901, 1.2687, 1.5396, 1.4237, 1.3112, 1.5717, 1.8257,\n 2.0738, 2.3163, 2.5533, 2.4371, 2.6681, 2.5538, 2.4422, 2.6667,\n 2.8868, 2.7761, 2.6679, 2.5621, 2.7757, 2.6713, 2.5690, 2.7775,\n 2.6765, 2.5775, 2.4804, 2.3851, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.5064, 2.7005, 2.8919, 3.0806, 3.2667, 3.4503, 3.3574, 3.2660,\n 3.4463, 3.6242, 3.5333, 3.7087, 3.6187, 3.5301, 3.7025, 3.6148,\n 3.5283, 3.6979, 3.6122, 3.7796, 3.6947, 3.8600, 3.7758, 3.6927,\n 3.8555, 4.0166, 4.1761, 4.3339, 4.4901, 4.6448, 4.7980, 4.7140,\n 4.6311, 4.7823, 4.9322, 5.0807, 5.2278, 5.3736, 5.2906, 5.2085,\n 5.3526, 5.4956, 5.6373, 5.7778, 5.9171, 5.8351, 5.7540, 5.8919,\n 6.0287, 6.1644, 6.2991, 6.4327, 6.5653, 6.6968, 6.8274, 6.9570,\n 7.0857, 7.2134, 7.3402, 7.4661, 7.5910, 7.7152, 7.8384, 7.9608,\n 8.0824, 8.2032, 8.3231, 8.4423, 8.5607, 8.6783, 8.7952, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.3248, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "accuracy_without_watermark": 0.56, + "accuracy_with_watermark": 0.54, + "f1_without_watermark": 0.5280995280995281, + "f1_with_watermark": 0.4875222816399287 + } + }, + "validation": { + "results": [ + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What came into force after the new constitution was herald?\nContext: As of that day, the new constitution heralding the Second Republic came into force.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.6732, 0.8520, 1.0289, 0.9631, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.6083, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.5547, -0.5990, -0.4593, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -0.9897, -1.0284, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.9086, 8.8007, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 9.7986, 9.9187, 10.0380,\n 9.9392, 9.8414, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.7074, 10.6145,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.5632, 10.6733, 10.5841, 10.4956,\n 10.6052, 10.7141, 10.6265, 10.5397, 10.4537, 10.5621, 10.4769, 10.3923,\n 10.5002, 10.4164, 10.5238, 10.6306, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.7031, 11.8028, 11.7217, 11.6412, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.6597, 12.7532, 12.8464, 12.7688,\n 12.8616, 12.9540, 13.0460, 12.9691, 13.0608, 13.1520, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.4510, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the first major city in the stream of the Rhine?\nContext: The most important tributaries in this area are the Ill below of Strasbourg, the Neckar in Mannheim and the Main across from Mainz.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -1.9640, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.7303, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.5010, -1.5479, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.4517, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.7270, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.0059, -2.0430,\n -2.0799, -1.9473, -1.9843, -1.8527, -1.8898, -1.9267, -1.9635, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.0623, 4.9316, 5.1241, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.6647, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.9438, 5.8275, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.0928, 5.9874, 5.8835, 6.0410, 5.9386,\n 5.8377, 5.9932, 5.8936, 5.7955, 5.6986, 5.6032, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 5.7785, 5.9270, 5.8358, 5.9827, 6.1283,\n 6.2725, 6.4153, 6.3248, 6.4663, 6.3768, 6.2883, 6.4283, 6.5672,\n 6.4795, 6.3928, 6.3070, 6.4444, 6.3595, 6.4957, 6.6308, 6.5465,\n 6.6804, 6.8133, 6.9451, 7.0759, 6.9923, 7.1220, 7.0391, 6.9570,\n 7.0857, 7.0043, 7.1319, 7.0513, 7.1779, 7.3037, 7.4286, 7.5526,\n 7.4724, 7.5955, 7.7178, 7.6383, 7.5595, 7.6808, 7.8014, 7.9212,\n 8.0402, 8.1585, 8.0801, 8.1976, 8.1198, 8.0427, 7.9663, 7.8905,\n 8.0070, 7.9318, 7.8571, 7.9729, 8.0880, 8.0139, 8.1282, 8.2420,\n 8.3550, 8.2813, 8.3937, 8.5054, 8.6165, 8.5433, 8.6537, 8.5810,\n 8.6908, 8.8000, 8.9086, 9.0167, 8.9444, 9.0518, 9.1587, 9.0869,\n 9.0155, 8.9447, 9.0510, 9.1567, 9.2619, 9.1915, 9.2961, 9.4002,\n 9.3302, 9.4338, 9.5369, 9.6394, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.8054, 9.9060, 10.0061, 10.1058, 10.2050, 10.1363, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.4603, 10.5573, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the minimum required if you want to teach in Canada?\nContext: In most provinces a second Bachelor's Degree such as a Bachelor of Education is required to become a qualified teacher.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "178", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "40.4%", + "z-score": "4.76", + "p value": "9.67e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.0494, 2.3094, 2.1831, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.0290, 2.9055, 2.7852, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.4495, 2.3445, 2.2418, 2.4585, 2.6713, 2.8804, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.9814, 2.8830, 3.0796, 2.9823, 2.8868,\n 2.7928, 2.7005, 2.6098, 2.5205, 2.7107, 2.6222, 2.8093, 2.7217,\n 2.9057, 3.0873, 3.0000, 3.1789, 3.3556, 3.5301, 3.7025, 3.6148,\n 3.5283, 3.6979, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.6927,\n 3.8555, 3.7732, 3.6919, 3.8523, 3.7717, 3.9302, 4.0872, 4.0069,\n 4.1621, 4.0825, 4.0038, 3.9260, 3.8490, 3.7730, 3.6977, 3.8497,\n 4.0004, 3.9254, 4.0745, 4.2222, 4.3687, 4.5140, 4.4388, 4.3644,\n 4.2907, 4.4341, 4.3609, 4.5029, 4.4302, 4.3583, 4.4987, 4.4272,\n 4.5663, 4.4953, 4.6332, 4.7700, 4.6992, 4.6291, 4.7645, 4.6949,\n 4.6258, 4.5573, 4.4895, 4.6232, 4.5557, 4.6883, 4.6212, 4.5547,\n 4.4887, 4.4233, 4.3583, 4.4891, 4.4246, 4.3605, 4.4901, 4.4264,\n 4.3631, 4.4915, 4.4286, 4.3661, 4.4933, 4.4312, 4.5575, 4.6829,\n 4.8076, 4.7455, 4.8693, 4.8074, 4.7460, 4.6850, 4.8076, 4.7469,\n 4.8687, 4.8083, 4.7483, 4.6887, 4.6295, 4.5707, 4.6911, 4.6325,\n 4.5744, 4.6938, 4.6359, 4.5783, 4.6968, 4.6395, 4.5826, 4.7001,\n 4.8170, 4.7602])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.3890, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.6339, 7.7784, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.0171, 7.9115, 8.0495, 7.9455, 7.8428, 7.9796, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 7.8889, 7.7937, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.4778, 9.5938,\n 9.5057, 9.4185, 9.5338, 9.4474, 9.5620, 9.4763, 9.3915, 9.3074,\n 9.4213, 9.5346, 9.4513, 9.3686, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.9524, 10.0611, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.4341, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.6145, 10.7175, 10.6397, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.6665, 10.5921, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.4300, 11.3572, 11.4533, 11.5489, 11.6441, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How was Tem\u00fcjin kept imprisoned by the Tayichi'ud?\nContext: The Tayichi'ud enslaved Tem\u00fcjin (reportedly with a cangue, a sort of portable stocks), but with the help of a sympathetic guard, the father of Chilaun (who later became a general of Genghis Khan), he was able to escape from the ger (yurt) in the middle of the night by hiding in a river crevice.[citation needed]\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.2520, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.3365, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.4082,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.6149, -0.4714, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.6623, -0.5283, -0.3951, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.5803, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.4207, 3.2863,\n 3.1558, 3.3853, 3.6098, 3.4816, 3.3566, 3.5753, 3.7897, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.0446, 4.2426, 4.1260, 4.0119,\n 3.9001, 3.7905, 3.6831, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.5115, 4.6904, 4.8669, 4.7610, 4.6568, 4.5544, 4.7278, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.7242, 5.8835, 5.7812, 5.9386,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.3035, 6.2075, 6.1128, 6.0193, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.3248, 6.2354, 6.1470, 6.2883, 6.4283, 6.3408,\n 6.4795, 6.6171, 6.7536, 6.6667, 6.5807, 6.7159, 6.8500, 6.9830,\n 6.8977, 6.8133, 6.9451, 6.8615, 6.9923, 7.1220, 7.0391, 7.1678,\n 7.2956, 7.2134, 7.1319, 7.2587, 7.3845, 7.3037, 7.2236, 7.1443,\n 7.0658, 7.1904, 7.1125, 7.2363, 7.1590, 7.0823, 7.2051, 7.3271,\n 7.4483, 7.5687, 7.6883, 7.8072, 7.9253, 7.8489, 7.9663, 7.8905,\n 8.0070, 8.1229, 8.2381, 8.3526, 8.4664, 8.5796, 8.5041, 8.4293,\n 8.5417, 8.4674, 8.5792, 8.5054, 8.6165, 8.7270, 8.6537, 8.5810,\n 8.5088, 8.4371, 8.5469, 8.6560, 8.7646, 8.8726, 8.8013, 8.9087,\n 9.0155, 8.9447, 8.8744, 8.8045, 8.7351, 8.8413, 8.9469, 8.8780,\n 8.8094, 8.7414, 8.8464, 8.7788, 8.8832, 8.9872, 9.0906, 9.1936,\n 9.1262, 9.0593, 8.9929, 9.0952, 9.1971, 9.2986, 9.3995, 9.3333,\n 9.4338, 9.5338, 9.4680, 9.4026, 9.3375, 9.2729, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What did Herr Gott, dich loben wir become known as ?\nContext: He paraphrased the Te Deum as \"Herr Gott, dich loben wir\" with a simplified form of the melody.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.9169, 1.1202, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 1.1547, 1.0915, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.4517, 1.3943, 1.5430, 1.4857, 1.6330,\n 1.7792, 1.9242, 1.8664, 1.8091, 1.9524, 2.0948, 2.2361, 2.1783,\n 2.3183, 2.2608, 2.2037, 2.3422, 2.2852, 2.2287, 2.3657, 2.5019,\n 2.6370, 2.5802, 2.7143, 2.6576, 2.6014, 2.5456, 2.4902, 2.4351,\n 2.3805, 2.3262, 2.2723, 2.4037, 2.3500, 2.4803, 2.4267, 2.3735,\n 2.3206, 2.4495, 2.3967, 2.3443, 2.4721, 2.4198, 2.5466, 2.4944,\n 2.6203, 2.7454, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.4597, 2.5820, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.1418, 2.2618, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.1913, 2.1444, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "157", + "Fraction of T in Greenlist": "78.9%", + "z-score": "17.6", + "p value": "2.59e-69", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 8.8121, 8.6469, 8.8029, 8.9567, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.6838, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.3621, 10.4952, 10.6270, 10.4834, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 10.9777, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.6890, 11.5630, 11.6809, 11.7978, 11.9138, 12.0289, 12.1432, 12.2565,\n 12.1346, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.5723, 12.6815,\n 12.7900, 12.8978, 12.7815, 12.6667, 12.7743, 12.8813, 12.9875, 13.0931,\n 13.1979, 13.3022, 13.1904, 13.0798, 13.1839, 13.2873, 13.3902, 13.4924,\n 13.5940, 13.6950, 13.7953, 13.8952, 13.7878, 13.6816, 13.7813, 13.8804,\n 13.9790, 14.0771, 14.1746, 14.2715, 14.3680, 14.4639, 14.3605, 14.2581,\n 14.3540, 14.4493, 14.5442, 14.6385, 14.7324, 14.8257, 14.9187, 15.0111,\n 14.9113, 14.8124, 14.9048, 14.9967, 15.0882, 15.1792, 15.2698, 15.3600,\n 15.2631, 15.1669, 15.2570, 15.3467, 15.4360, 15.5249, 15.6133, 15.7014,\n 15.7890, 15.8763, 15.7823, 15.6891, 15.7763, 15.8631, 15.9496, 16.0357,\n 16.1214, 16.2068, 16.1151, 16.0242, 16.1095, 16.1945, 16.2791, 16.3633,\n 16.4472, 16.5308, 16.6140, 16.6969, 16.6078, 16.5193, 16.6021, 16.6846,\n 16.7668, 16.8487, 16.9302, 17.0115, 17.0924, 17.1730, 17.0862, 17.0000,\n 17.0806, 17.1609, 17.2408, 17.3205, 17.3999, 17.4790, 17.5578])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What year did the the case go before the supreme court?\nContext: For example, Joseph Haas was arrested for allegedly sending an email to the Lebanon, New Hampshire city councilors stating, \"Wise up or die.\"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 5.0602, 4.9377,\n 4.8177, 4.7002, 4.5850, 4.4721, 4.3614, 4.2528, 4.1461, 4.0415,\n 4.2251, 4.1219, 4.0205, 3.9208, 3.8228, 3.7264, 3.6315, 3.8103,\n 3.7166, 3.6242, 3.5333, 3.7087, 3.6187, 3.5301, 3.4427, 3.3566,\n 3.2717, 3.1879, 3.1052, 3.0237, 2.9433, 2.8638, 2.7854, 2.7080,\n 2.6316, 2.5560, 2.4814, 2.6485, 2.5743, 2.5011, 2.6656, 2.5927,\n 2.5207, 2.4495, 2.3791, 2.3094, 2.2405, 2.1723, 2.1049, 2.0381,\n 1.9720, 1.9066, 1.8419, 2.0000, 1.9355, 1.8716, 2.0276, 1.9640,\n 1.9009, 2.0548, 1.9920, 1.9298, 2.0817, 2.2323, 2.1700, 2.1082,\n 2.0470, 1.9863, 1.9261, 1.8665, 1.8074, 1.7488, 1.8956, 2.0412,\n 1.9825, 1.9242, 2.0682, 2.0101, 1.9524, 2.0948, 2.0373, 1.9803,\n 2.1210, 2.2608, 2.2037, 2.1470, 2.0907, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.9524, 2.0881, 2.0338, 1.9799, 1.9263, 2.0605,\n 2.0071, 1.9540, 1.9013, 2.0339, 1.9813, 1.9291, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.8541, 1.8033, 1.7529, 1.8821, 1.8317, 1.7817,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.7693, 1.7213, 1.8453, 1.7974, 1.7498, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.8527, 1.8058, 1.7592, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 2.0370, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.7009, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.6790, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.2485, 5.1326, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.1490, 5.0410, 4.9348, 5.1065, 5.0019, 4.8990,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.4610, 5.6220, 5.5213, 5.4222,\n 5.3245, 5.4832, 5.3867, 5.2915, 5.1977, 5.1051, 5.2614, 5.1698,\n 5.3243, 5.4772, 5.6286, 5.5377, 5.4480, 5.5976, 5.5088, 5.6569,\n 5.8035, 5.7155, 5.8606, 6.0044, 6.1470, 6.0596, 6.2008, 6.3408,\n 6.2541, 6.1685, 6.0838, 6.0000, 6.1383, 6.2755, 6.4116, 6.5465,\n 6.4632, 6.3807, 6.2991, 6.2183, 6.3517, 6.2716, 6.1923, 6.3246,\n 6.4558, 6.5861, 6.7155, 6.6365, 6.7648, 6.6865, 6.6089, 6.5320,\n 6.6591, 6.5828, 6.7090, 6.8343, 6.9587, 7.0823, 7.2051, 7.3271,\n 7.4483, 7.3721, 7.2966, 7.2217, 7.1474, 7.0737, 7.0007, 7.1207,\n 7.2399, 7.3584, 7.4762, 7.5933, 7.5204, 7.4482, 7.3765, 7.3054,\n 7.4215, 7.5369, 7.6517, 7.7658, 7.8793, 7.9921, 7.9211, 8.0333,\n 7.9628, 7.8928, 8.0042, 8.1150, 8.0455, 7.9764, 8.0865, 8.1960,\n 8.3050, 8.2362, 8.1679, 8.2762, 8.2084, 8.1410, 8.2486, 8.1817,\n 8.1151, 8.0490, 8.1560, 8.0902, 8.0249, 8.1312, 8.2370, 8.3423,\n 8.2773, 8.3820, 8.4862, 8.4215, 8.5252, 8.6284, 8.7311, 8.6667,\n 8.6026, 8.7048, 8.6411, 8.5778, 8.6794, 8.7805, 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What does UMC stand for?\nContext: Founded in 1968 by the union of the Methodist Church (USA) and the Evangelical United Brethren Church, the UMC traces its roots back to the revival movement of John and Charles Wesley in England as well as the Great Awakening in the United States.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.4140, -2.4618,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.7272, -2.5403, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.3552, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.5318, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.4574, -2.4975, -2.3422, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.1576, -2.1980, -2.0476, -2.0881, -2.1284, -1.9799, -2.0203, -2.0605,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -1.9445, -1.8033, -1.6630, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.7404, -1.7780, -1.8155, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "69.9%", + "z-score": "8.85", + "p value": "4.29e-19", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.8966, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.6461, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.6817, 8.5491, 8.4188, 8.5649, 8.7093,\n 8.8522])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the name of the professional skateboarder that lives in southern California?\nContext: Southern California is also important to the world of yachting.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547, 0.9802, 0.8165,\n 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142, 1.2702, 1.1323, 1.0000,\n 1.3093, 1.1793, 1.4757, 1.7628, 2.0412, 1.9096, 2.1783, 2.4398, 2.3094,\n 2.1831, 2.0605, 2.3113, 2.5560, 2.4345, 2.3163, 2.5533, 2.4371, 2.3238,\n 2.5538, 2.4422, 2.3333, 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570,\n 2.5690, 2.7775, 2.6765, 2.5775, 2.7811, 2.6833, 2.5873, 2.7863, 2.6914,\n 2.8868, 3.0793, 2.9848, 2.8919, 2.8006, 2.7107, 2.8983, 2.8093, 2.7217,\n 2.9057, 2.8189, 2.7333, 2.6491, 2.5660, 2.7456, 2.6632, 2.8402, 3.0151,\n 2.9329, 2.8518, 2.7717, 2.9433, 3.1129, 3.0330, 2.9542, 2.8764, 3.0429,\n 2.9656, 2.8893, 2.8138, 2.7393, 2.6656, 2.5927, 2.5207, 2.4495, 2.3791,\n 2.3094, 2.4703, 2.4010, 2.5600, 2.4910, 2.4228, 2.5796, 2.5117, 2.4444,\n 2.5991, 2.5322, 2.4660, 2.4004, 2.3354, 2.2711, 2.2074, 2.1442, 2.0817,\n 2.0197, 2.1700, 2.1082, 2.0470, 1.9863, 2.1344, 2.0739, 2.0140, 1.9545,\n 1.8956, 1.8371, 1.9825, 2.1268, 2.2699, 2.2111, 2.1527, 2.0948, 2.2361,\n 2.1783, 2.1210, 2.2608, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.1980, 2.3333, 2.2780, 2.2230, 2.1685, 2.3022, 2.2478, 2.1938,\n 2.1401, 2.0868, 2.0339, 1.9813, 1.9291, 2.0604, 2.0083, 2.1386, 2.0866,\n 2.0350, 1.9837, 2.1125, 2.0613, 2.0105, 2.1381, 2.0873, 2.0369, 2.1634,\n 2.2892, 2.4142, 2.3635, 2.3131, 2.2630, 2.3868, 2.3368, 2.2871, 2.4099,\n 2.3603, 2.3110, 2.2620, 2.3835, 2.3346, 2.2860, 2.2377, 2.3580, 2.4778,\n 2.4294, 2.3812, 2.3333, 2.4520, 2.4042, 2.3567, 2.3094, 2.2624, 2.2156,\n 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142, 1.7321, 1.5852, 1.4444,\n 1.7457, 2.0370, 2.3190, 2.5924, 2.4495, 2.7136, 2.5744, 2.4398, 2.3094,\n 2.5627, 2.4351, 2.3113, 2.1909, 2.4345, 2.3163, 2.2011, 2.4371, 2.6681,\n 2.8943, 3.1160, 3.0000, 3.2167, 3.1027, 2.9913, 2.8823, 3.0929, 2.9856,\n 2.8804, 2.7775, 2.9824, 2.8808, 2.7811, 2.9814, 3.1787, 3.3729, 3.5642,\n 3.4641, 3.6522, 3.5533, 3.4562, 3.3607, 3.5447, 3.4503, 3.3574, 3.2660,\n 3.4463, 3.3558, 3.2667, 3.4438, 3.6187, 3.7916, 3.9624, 3.8730, 4.0415,\n 4.2080, 4.1192, 4.0316, 3.9452, 4.1090, 4.0234, 4.1851, 4.3451, 4.5035,\n 4.6603, 4.8154, 4.7296, 4.8830, 4.7980, 4.7140, 4.8655, 4.7823, 4.9322,\n 4.8497, 4.7682, 4.6876, 4.8355, 4.7556, 4.9019, 5.0469, 5.1908, 5.3333,\n 5.4747, 5.3947, 5.5348, 5.6737, 5.8114, 5.9481, 5.8684, 6.0038, 5.9247,\n 5.8464, 5.7689, 5.9029, 5.8260, 5.9589, 6.0908, 6.2217, 6.3517, 6.4807,\n 6.4039, 6.5320, 6.6591, 6.7854, 6.9107, 6.8343, 6.9587, 6.8828, 6.8076,\n 6.7330, 6.8564, 6.7823, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.7096, 7.8253, 7.9403, 7.8673, 7.7949,\n 7.7230, 7.8372, 7.7658, 7.6950, 7.6246, 7.5548, 7.6681, 7.5988, 7.7114,\n 7.8233, 7.9347, 8.0455, 8.1556, 8.0865, 8.1960, 8.1273, 8.0591, 8.1679,\n 8.1001, 8.2084, 8.1410, 8.0741, 8.0076, 8.1151, 8.0490, 7.9833, 7.9181,\n 7.8533, 7.9601, 7.8956, 8.0018, 8.1075, 8.2127, 8.3173, 8.4215, 8.3572,\n 8.4608, 8.5640, 8.6667, 8.7689, 8.7048, 8.8065, 8.7427, 8.6794, 8.6164,\n 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where did Tem\u00fcjin hide during his escape from the Tayichi'ud?\nContext: Tem\u00fcjin's reputation also became widespread after his escape from the Tayichi'ud.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -1.8856,\n -1.9630, -1.5852, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.1677, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.0596, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.2780,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.2472,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.1380, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.3474, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.5597, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.0822, 7.9796, 7.8782, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.5715, 8.6976, 8.8227, 8.7287,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.0951, 9.0060, 8.9178, 8.8304, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.7590, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.1692, 10.0881, 10.0076, 9.9278,\n 9.8486, 9.7701, 9.8776, 9.9846, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.4594, 10.5625, 10.6650, 10.7671, 10.6904,\n 10.6144, 10.5388, 10.6404, 10.7415, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.0661, 10.9917, 10.9178, 10.8444, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.5470, 11.4765, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What are the most active parts of ctenophora?\nContext: These branch through the mesoglea to the most active parts of the animal: the mouth and pharynx; the roots of the tentacles, if present; all along the underside of each comb row; and four branches round the sensory complex at the far end from the mouth \u2013 two of these four branches terminate in anal pores.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -1.9702, -2.0412,\n -2.1106, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.2785, -2.3163,\n -2.1762, -2.2140, -2.2517, -2.1131, -1.9753, -2.0134, -1.8767, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -1.8527, -1.7218, -1.7592, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.1603, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.1779, 4.3894, 4.2563, 4.1265, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.3618, 4.2426, 4.1260, 4.3205,\n 4.2060, 4.0937, 4.2844, 4.4721, 4.3614, 4.2528, 4.4371, 4.6188,\n 4.5115, 4.4061, 4.5847, 4.4809, 4.6568, 4.5544, 4.7278, 4.6268,\n 4.7977, 4.9666, 4.8667, 5.0332, 4.9346, 5.0990, 5.2615, 5.4222,\n 5.5811, 5.7382, 5.8936, 5.7955, 5.9491, 6.1012, 6.2517, 6.1546,\n 6.3035, 6.4510, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.0711,\n 6.9759, 6.8819, 7.0211, 6.9282, 6.8364, 6.9743, 7.1110, 7.0201,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.5967, 7.7268, 7.6376,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.0882, 8.2107, 8.1266, 8.2483, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.4812, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.5714, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.0631, 9.9846, 9.9067, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.2790, 10.3827, 10.4858, 10.4097, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.7415, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.0327, 11.9586, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.1867, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who decides the fate of protesters most of the time?\nContext: Brownlee argues, \"Bringing in deterrence at the level of justification detracts from the law\u2019s engagement in a moral dialogue with the offender as a rational person because it focuses attention on the threat of punishment and not the moral reasons to follow this law.\"\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "27.8%", + "z-score": "0.645", + "p value": "0.26", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "50.0%", + "z-score": "2.31", + "p value": "0.0105", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What act sets forth the functions of the Scottish Parliament?\nContext: The Scotland Act 1998, which was passed by the Parliament of the United Kingdom and given royal assent by Queen Elizabeth II on 19 November 1998, governs the functions and role of the Scottish Parliament and delimits its legislative competence.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "71", + "Fraction of T in Greenlist": "35.7%", + "z-score": "3.48", + "p value": "0.000252", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.5785, 1.4907, 1.4045, 1.6131, 1.8185, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.7130, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.8543, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.7951, 1.7233,\n 1.6524, 1.8257, 1.9973, 2.1669, 2.0954, 2.0247, 1.9548, 2.1213,\n 2.2862, 2.2162, 2.3791, 2.5403, 2.4703, 2.4010, 2.3324, 2.4910,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.2711, 2.4227, 2.3586, 2.2952, 2.4449, 2.5934, 2.7406,\n 2.6768, 2.6135, 2.5508, 2.6961, 2.8402, 2.7775, 2.9202, 3.0619,\n 2.9991, 2.9369, 2.8753, 3.0151, 3.1539, 3.0923, 3.0311, 2.9704,\n 2.9103, 2.8505, 2.7913, 2.7325, 2.6742, 2.8101, 2.9451, 2.8868,\n 2.8288, 2.7713, 2.7143, 2.8475, 2.7906, 2.7341, 2.6781, 2.8098,\n 2.9406, 2.8845, 3.0143, 3.1433, 3.0872, 3.0315, 2.9761, 3.1038,\n 3.2306, 3.1753, 3.1203, 3.0657, 3.0114, 3.1368, 3.0827, 3.0290,\n 2.9756, 3.0997, 3.2230, 3.1696, 3.2921, 3.4140, 3.3606, 3.3075,\n 3.2547, 3.3754, 3.4954, 3.4427, 3.3902, 3.3381, 3.2863, 3.2348,\n 3.1836, 3.1327, 3.0821, 3.2002, 3.3177, 3.2671, 3.3838, 3.5000,\n 3.4494, 3.3990, 3.3489, 3.4641, 3.5787, 3.5286, 3.4788])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "60.5%", + "z-score": "10.6", + "p value": "1.69e-26", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.8458, 7.0000, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 8.0829, 7.9853, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.3320, 9.2463, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.9249, 10.0342, 10.1429, 10.0611, 10.1692, 10.2766, 10.1955, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the name for a response of the immune system that damages the body's native tissues?\nContext: Hypersensitivity is an immune response that damages the body's own tissues.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.6086, -0.4237, -0.4815, -0.5388, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.3522, 0.3073, 0.2626, 0.3928, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.2940, 0.2513, 0.2089, 0.3333,\n 0.2909, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.4816, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.9001, 4.0937, 4.2844, 4.4721, 4.6571, 4.5461, 4.4371, 4.3301,\n 4.2251, 4.1219, 4.3026, 4.4809, 4.6568, 4.8305, 5.0019, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.5034, 4.6715, 4.8375, 5.0017, 5.1640,\n 5.3245, 5.2281, 5.1332, 5.0395, 4.9472, 4.8561, 5.0138, 5.1698,\n 5.3243, 5.4772, 5.6286, 5.5377, 5.4480, 5.3594, 5.2719, 5.1855,\n 5.3345, 5.4822, 5.6285, 5.7735, 5.9172, 5.8310, 5.7457, 5.6614,\n 5.5780, 5.4956, 5.6373, 5.7778, 5.9171, 6.0553, 6.1924, 6.1101,\n 6.0287, 5.9481, 5.8684, 5.7894, 5.9247, 6.0590, 6.1923, 6.3246,\n 6.4558, 6.3770, 6.2990, 6.2217, 6.1451, 6.0693, 6.1990, 6.3278,\n 6.4558, 6.5828, 6.7090, 6.6332, 6.5582, 6.4838, 6.4101, 6.3369,\n 6.4618, 6.5857, 6.7089, 6.8313, 6.9529, 6.8799, 6.8075, 6.7358,\n 6.6645, 6.5939, 6.7143, 6.8339, 6.9529, 7.0711, 7.1886, 7.1181,\n 7.0481, 6.9786, 6.9097, 6.8413, 6.9577, 7.0735, 7.1885, 7.3030,\n 7.4168, 7.3485, 7.2807, 7.2134, 7.1465, 7.0801, 7.1929, 7.3051,\n 7.4167, 7.5277, 7.6381, 7.5719, 7.5061, 7.4407, 7.3758, 7.3113,\n 7.4208, 7.5297, 7.6381, 7.7460, 7.8533, 7.7889, 7.7249, 7.6613,\n 7.5981, 7.5353, 7.6418, 7.7478, 7.8533, 7.9582, 8.0627, 8.0000,\n 7.9377, 7.8758, 7.8142, 7.7530, 7.8567, 7.9599, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When did the Warsaw Uprising begin?\nContext: the Polish government-in-exile in London gave orders to the underground Home Army (AK) to try to seize control of Warsaw from the Germans before the Red Army arrived.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "119", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "28.6%", + "z-score": "0.9", + "p value": "0.184", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.5260, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.0469, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.9076, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 8.9444, 8.8544,\n 8.9752, 8.8860, 9.0060, 8.9178, 9.0370, 8.9496, 8.8631, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.0453, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.2240, 9.1414, 9.2554, 9.1735, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.3495, 9.2697, 9.3810, 9.4916, 9.6016, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.7224, 9.6456, 9.7526, 9.6764,\n 9.7828, 9.7072, 9.8131, 9.7380, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.9038, 9.8303, 9.9340, 10.0371, 10.1398, 10.0668, 10.1690, 10.0965,\n 10.1981, 10.1262, 10.0547, 10.1558, 10.2565, 10.1855, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.5128, 10.6111, 10.7090, 10.8064, 10.9034, 11.0000,\n 11.0961, 11.0261, 11.1218, 11.2171, 11.3120, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: The Tower District is centered around which historic theatre?\nContext: The theater was built in 1939 and is at Olive and Wishon Avenues in the heart of the Tower District.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.8034, 1.6859, 1.9415, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 2.3238, 2.2133, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.0428, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.4804, 2.3851, 2.2916, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.0455, 1.9604, 1.8766, 1.7942, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.7765, 1.9612, 2.1436, 2.3238,\n 2.5019, 2.6778, 2.5983, 2.5198, 2.4423, 2.3658, 2.2902, 2.2156,\n 2.1420, 2.3126, 2.2393, 2.4077, 2.5743, 2.5011, 2.4286, 2.5927,\n 2.5207, 2.4495, 2.6112, 2.5403, 2.4703, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.2222, 2.3779, 2.5322, 2.4660, 2.4004,\n 2.5527, 2.7037, 2.6381, 2.5731, 2.7222, 2.6575, 2.8051, 2.9515,\n 2.8868, 2.8226, 2.7591, 2.6961, 2.8402, 2.7775, 2.9202, 3.0619,\n 2.9991, 2.9369, 3.0770, 3.0151, 3.1539, 3.2918, 3.2299, 3.1685,\n 3.1076, 3.2437, 3.3789, 3.3181, 3.2577, 3.1977, 3.3314, 3.2717,\n 3.4042, 3.3447, 3.2857, 3.2271, 3.3582, 3.2998, 3.2419, 3.3717,\n 3.5007, 3.4428, 3.3853, 3.5131, 3.4558, 3.3989, 3.3424, 3.2863,\n 3.2306, 3.1753, 3.1203, 3.0657, 3.1912, 3.1368, 3.0827, 3.0290,\n 2.9756, 2.9225, 3.0464, 3.1696, 3.2921, 3.4140, 3.5351, 3.6556,\n 3.6019, 3.5485, 3.4954, 3.4427, 3.3902, 3.3381, 3.2863, 3.4050,\n 3.3534, 3.4713, 3.5887, 3.5370, 3.4857, 3.6021, 3.5509, 3.5000,\n 3.6156, 3.5648, 3.5143, 3.4641, 3.4142, 3.3645, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.7417, 3.9620, 3.8297, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.6086, 5.7719, 5.9333, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.5970, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.9759, 6.8819, 7.0211, 6.9282, 7.0662, 6.9743, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.5556, 7.6867, 7.5967, 7.5076, 7.6376,\n 7.7667, 7.8948, 8.0219, 7.9336, 8.0598, 7.9724, 7.8859, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439, 8.8631, 8.9815,\n 8.8958, 9.0134, 8.9285, 9.0453, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.4812, 9.3993, 9.5112, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.7908, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.9067, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.3150,\n 11.2396, 11.3378, 11.2630, 11.3608, 11.4581, 11.5549, 11.4806, 11.4068,\n 11.5033, 11.4300, 11.5261, 11.6217, 11.5489, 11.4766, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.4765, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the English translation of Het Scheur?\nContext: The largest and southern main branch begins as Waal and continues as Boven Merwede (\"Upper Merwede\"), Beneden Merwede (\"Lower Merwede\"), Noord River (\"North River\"), Nieuwe Maas (\"New Meuse\"), Het Scheur (\"the Rip\") and Nieuwe Waterweg (\"New Waterway\").\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.8536, -2.8928, -2.9317, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -3.0417, -3.0792,\n -3.1165, -3.1536, -3.1905, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.2925, -3.3282, -3.3637, -3.3989, -3.4340, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.6645, -3.5218, -3.5555, -3.5890, -3.6224, -3.6556,\n -3.6887, -3.7216, -3.7543, -3.7869, -3.8194, -3.8516, -3.8838, -3.9158,\n -3.7778, -3.6407, -3.6731, -3.5370, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.5325, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "61.4%", + "z-score": "11.8", + "p value": "1.83e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.0814, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 3.7009, 3.9158, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.3618, 4.2426, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.0190, 4.9075,\n 4.7980, 4.9747, 5.1490, 5.0410, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.5035, 5.4000, 5.5630, 5.7242, 5.8835, 6.0410, 5.9386,\n 6.0943, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.6944, 6.8391, 6.9824, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.2104, 7.3485, 7.2532, 7.1591, 7.2960, 7.2029, 7.1110, 7.0201,\n 6.9303, 7.0657, 6.9768, 6.8889, 7.0231, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.2459, 7.3758, 7.5048, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.3324, 8.2483, 8.1650,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.7610, 8.8778, 8.7952, 8.9113,\n 9.0267, 8.9448, 9.0595, 8.9783, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.1933, 11.1173, 11.2164, 11.1410,\n 11.0661, 10.9917, 10.9178, 11.0165, 11.1148, 11.0414, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.5948, 11.5235, 11.6179, 11.7120, 11.8056])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is one of the largest music schools in Europe?\nContext: Other institutions for higher education include the Medical University of Warsaw, the largest medical school in Poland and one of the most prestigious, the National Defence University, highest military academic institution in Poland, the Fryderyk Chopin University of Music the oldest and largest music school in Poland, and one of the largest in Europe, the Warsaw School of Economics, the oldest and most renowned economic university in the country, and the Warsaw University of Life Sciences the largest agricultural university founded in 1818.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -1.9081, -1.6997, -1.4940, -1.5492,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.1785,\n -0.9966, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.2857, -1.1390, -1.1825, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.0954,\n -0.9555, -0.8165, -0.8592, -0.7213, -0.7641, -0.6274, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.4857, -0.5283, -0.5706, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.6266, -0.6667,\n -0.5403, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 7.1241, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 7.6615, 7.8360, 8.0076, 8.1763, 8.3423, 8.1689, 8.3333,\n 8.4953, 8.3283, 8.1654, 8.3267, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.5219, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.8388, 10.7099, 10.8350, 10.9589, 11.0818, 11.2036, 11.3244,\n 11.1994, 11.3196, 11.4388, 11.5570, 11.6743, 11.5525, 11.6693, 11.7851,\n 11.6656, 11.5476, 11.6632, 11.5470, 11.6620, 11.7762, 11.8896, 11.7757,\n 11.8885, 12.0005, 12.1118, 12.2222, 12.3319, 12.2207, 12.3299, 12.4384,\n 12.5462, 12.6533, 12.5443, 12.4365, 12.5434, 12.6496, 12.7551, 12.8599,\n 12.9641, 13.0677, 13.1707, 13.0656, 13.1681, 13.2701, 13.3714, 13.4722,\n 13.5724, 13.4694, 13.5693, 13.6685, 13.7672, 13.8654, 13.9630, 13.8621,\n 13.9594, 14.0561, 14.1524, 14.2481, 14.1489, 14.2443, 14.3393, 14.2413,\n 14.1442, 14.2390, 14.1429, 14.2374, 14.3314, 14.4250, 14.3302, 14.4234,\n 14.5162, 14.6086, 14.7005, 14.7920, 14.6987, 14.7899, 14.8807, 14.9711,\n 15.0610, 14.9691, 14.8779, 14.9677, 15.0571, 15.1461, 15.2348, 15.3230,\n 15.4108, 15.4983, 15.4087, 15.4959, 15.5828, 15.6692, 15.7553, 15.8411,\n 15.7529, 15.8384, 15.9235, 16.0083, 16.0928, 16.1769, 16.0900, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.3377, 16.4205, 16.5028, 16.4178, 16.3333,\n 16.4156, 16.3318, 16.4139, 16.4957, 16.5772, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What percentage of farmland grows wheat?\nContext: More than 50% of this area is sown for wheat, 33% for barley and 7% for oats.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.0196, -2.0692, -2.1183, -1.9262, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -1.9535, -2.0000, -2.0461, -1.8716, -1.6988, -1.7457,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.3891, -2.4286, -2.4678, -2.5068, -2.3570, -2.3962, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -3.1342, -3.1674, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.8428, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.9853, 8.1176, 8.0212, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.2372, 8.1481, 8.2733, 8.3976, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.9496, 8.8631, 8.9815,\n 8.8958, 8.8108, 8.9285, 8.8443, 8.9612, 9.0773, 9.1927, 9.1094,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.2055, 9.1250, 9.0452,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.2232, 9.3338, 9.4438, 9.5532,\n 9.4752, 9.3979, 9.3212, 9.2450, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.6008, 9.7072, 9.8131, 9.7380, 9.8433, 9.9481, 10.0523, 9.9778,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.9178, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What did he light at his laboratories to demonstrate his wireless power transmission?\nContext: He lit electric lamps wirelessly at both locations, demonstrating the potential of wireless power transmission.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "29.9%", + "z-score": "1.47", + "p value": "0.0702", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.9771, 1.1926, 1.1088, 1.3198, 1.5275, 1.4434,\n 1.6471, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.8147, 0.9733, 0.9152, 0.8577, 1.0141, 1.1693, 1.3231, 1.4757,\n 1.4171, 1.5681, 1.7179, 1.6591, 1.6008, 1.7488, 1.8956, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.7522, 1.6958, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.6160, 1.7566, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.5303, 1.6641, 1.6127, 1.5617, 1.5110, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.5924, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.2195, 8.0829,\n 7.9489, 8.1016, 7.9704, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 9.9146, 9.8064, 9.6995, 9.8237, 9.7183, 9.6141, 9.7376,\n 9.6348, 9.7574, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.4083, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.6719, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.3642, 11.2719, 11.1803, 11.2877,\n 11.1971, 11.1073, 11.2142, 11.1253, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.5797, 11.6827, 11.7851, 11.6990, 11.8010,\n 11.9024, 11.8172, 11.9181, 12.0185, 11.9341, 12.0341, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.6367, 12.7329, 12.6504,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.4223, 12.5179, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 12.9244, 12.8464, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.0608, 13.1520, 13.2429, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where did the Exposition take place?\nContext: This World's Fair devoted a building to electrical exhibits.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-0.94", + "p value": "0.826", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -0.9631, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.0849, 5.9479, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.0883, 5.9628, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.3249, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 9.9783, 9.8877, 10.0021,\n 10.1157, 10.0261, 9.9373, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.7349, 10.8426, 10.9497, 10.8631, 10.7772,\n 10.8838, 10.7987, 10.7143, 10.8204, 10.9259, 10.8423, 10.7594, 10.6771,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.9773, 10.8984, 11.0004, 10.9220, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.0702, 10.9936, 11.0937, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.6297, 11.7261, 11.6514, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.0529, 11.9792, 12.0731, 12.0000,\n 11.9273, 12.0209, 12.1141, 12.0419, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is a ligand on the cell surface that is upregulated after helper T cell activation?\nContext: In addition, helper T cell activation causes an upregulation of molecules expressed on the T cell's surface, such as CD40 ligand (also called CD154), which provide extra stimulatory signals typically required to activate antibody-producing B cells.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "92", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "27.2%", + "z-score": "0.482", + "p value": "0.315", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 2.1170,\n 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868, 2.6605, 2.9938,\n 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998, 3.1177, 3.3968, 3.6667,\n 3.9279, 3.7524, 3.5839, 3.8367, 3.6742, 3.5176, 3.7626, 3.6108, 3.4641,\n 3.7017, 3.9337, 3.7905, 3.6515, 3.8772, 4.0980, 4.3142, 4.5260, 4.7336,\n 4.5968, 4.4634, 4.6667, 4.5363, 4.7357, 4.9316, 4.8038, 4.6790, 4.5569,\n 4.4374, 4.6291, 4.8177, 5.0034, 4.8857, 5.0684, 5.2485, 5.1326, 5.3100,\n 5.1962, 5.3709, 5.2590, 5.4312, 5.3211, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.2350, 5.4000, 5.5630, 5.7242, 5.8835, 5.7812, 5.6804, 5.5811,\n 5.4832, 5.3867, 5.2915, 5.1977, 5.1051, 5.2614, 5.4160, 5.5691, 5.4772,\n 5.6286, 5.7785, 5.9270, 5.8358, 5.7458, 5.6569, 5.5690, 5.7155, 5.6285,\n 5.7735, 5.9172, 6.0596, 6.2008, 6.1143, 6.2541, 6.1685, 6.0838, 6.0000,\n 5.9171, 6.0553, 5.9732, 6.1101, 6.0287, 6.1644, 6.0837, 6.0038, 6.1382,\n 6.2716, 6.1923, 6.3246, 6.2459, 6.3770, 6.2990, 6.2217, 6.3517, 6.2750,\n 6.1990, 6.1237, 6.0491, 5.9752, 5.9019, 6.0302, 6.1577, 6.0848, 6.2113,\n 6.1389, 6.0671, 5.9960, 6.1213, 6.2458, 6.3694, 6.2985, 6.2282, 6.1584,\n 6.0892, 6.2116, 6.3333, 6.2644, 6.3853, 6.3168, 6.4368, 6.5561, 6.4880,\n 6.6064, 6.7242, 6.6564, 6.5891, 6.7060, 6.8222, 6.9378, 7.0527, 6.9856,\n 7.0998, 7.0330, 6.9667, 7.0801, 7.0142, 7.1270, 7.0614, 6.9964, 7.1083,\n 7.0436, 6.9793, 6.9155, 6.8520, 6.9631, 7.0736, 7.0104, 7.1203, 7.0574,\n 7.1667, 7.2753, 7.2127, 7.3208, 7.4283, 7.3660, 7.3041, 7.4109, 7.5173,\n 7.6231, 7.7285, 7.6667, 7.7715, 7.7099, 7.6488, 7.7530, 7.6922, 7.7958,\n 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When did Warsaw become the center of the Congress Poland?\nContext: Warsaw remained the capital of the Polish\u2013Lithuanian Commonwealth until 1796, when it was annexed by the Kingdom of Prussia to become the capital of the province of South Prussia.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.1111, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.7761, -2.8296, -2.8823, -2.9343, -2.9856, -3.0361, -3.0861,\n -3.1353, -2.8808, -2.9314, -2.9814, -3.0308, -3.0796, -2.8368, -2.8868,\n -2.9361, -2.9848, -3.0330, -3.0806, -2.8497, -2.8983, -2.9463, -2.9938,\n -3.0408, -3.0873, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -3.1434, -3.1879, -3.2320, -3.2757, -3.3190, -3.1129, -3.1568, -3.2004,\n -3.2435, -3.2863, -3.0867, -3.1300, -3.1730, -3.2157, -3.2579, -3.2998,\n -3.1069, -3.1493, -3.1914, -3.2332, -3.2746, -3.3156, -3.1288, -3.1704,\n -3.2116, -3.2525, -3.2931, -3.3333, -3.3733, -3.4130, -3.4524, -3.4915,\n -3.5303, -3.3526, -3.3918, -3.4308, -3.4694, -3.5079, -3.3343, -3.3731,\n -3.4116, -3.4499, -3.4879, -3.5256, -3.5631, -3.6004, -3.6374, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.4171, -3.4543, -3.4913, -3.5280, -3.5645,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.5494, -3.5853, -3.6210, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.4170, -3.4528, -3.4884, -3.3359, -3.3717,\n -3.4073, -3.4428, -3.2925, -3.3282, -3.3637, -3.3989, -3.4340, -3.4689,\n -3.5036, -3.5382, -3.5725, -3.4263, -3.4609, -3.4953, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.3113, -3.3457, -3.3799, -3.2389, -3.2733, -3.3075,\n -3.3415, -3.2023, -3.2365, -3.2705, -3.3044, -3.3381, -3.3716, -3.4050,\n -3.4383, -3.4713, -3.3354, -3.3686, -3.4017, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 2.4495, 2.1170,\n 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321, 1.5403, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142, 1.2702, 1.1323, 1.0000,\n 1.3093, 1.1793, 1.4757, 1.7628, 2.0412, 2.3116, 2.5744, 2.8301, 3.0792,\n 2.9424, 3.1844, 3.0509, 3.2863, 3.1558, 3.3853, 3.6098, 3.8297, 3.7009,\n 3.9158, 4.1265, 4.3333, 4.5363, 4.7357, 4.9316, 4.8038, 4.9962, 4.8712,\n 5.0602, 4.9377, 5.1236, 5.3067, 5.1864, 5.0684, 4.9528, 4.8394, 4.7281,\n 4.6188, 4.5115, 4.6904, 4.8669, 4.7610, 4.6568, 4.5544, 4.7278, 4.8990,\n 4.7977, 4.9666, 5.1333, 5.2981, 5.4610, 5.6220, 5.7812, 5.9386, 6.0943,\n 5.9932, 5.8936, 6.0474, 5.9491, 5.8522, 6.0041, 5.9084, 5.8139, 5.7207,\n 5.8707, 5.7785, 5.9270, 6.0740, 5.9827, 5.8926, 6.0380, 5.9488, 6.0927,\n 6.0044, 6.1470, 6.0596, 5.9732, 5.8878, 5.8034, 5.7199, 5.8605, 6.0000,\n 6.1383, 6.2755, 6.4116, 6.3283, 6.4632, 6.3807, 6.5144, 6.4327, 6.5653,\n 6.6968, 6.6157, 6.5354, 6.4558, 6.3770, 6.2990, 6.2217, 6.1451, 6.2750,\n 6.4039, 6.3278, 6.2524, 6.1777, 6.3054, 6.4322, 6.3580, 6.4838, 6.6088,\n 6.7330, 6.8564, 6.9789, 7.1007, 7.2217, 7.3419, 7.2675, 7.1938, 7.3131,\n 7.2399, 7.1673, 7.2857, 7.2136, 7.1421, 7.0711, 7.1886, 7.1181, 7.2348,\n 7.3508, 7.2807, 7.2111, 7.3263, 7.4409, 7.5548, 7.4855, 7.4168, 7.3485,\n 7.2807, 7.2134, 7.1465, 7.2594, 7.3717, 7.4833, 7.5944, 7.7048, 7.8147,\n 7.9241, 8.0328, 7.9659, 7.8995, 8.0076, 7.9415, 7.8759, 7.9833, 7.9181,\n 7.8533, 7.7889, 7.8956, 7.8316, 7.9377, 8.0433, 7.9796, 7.9162, 8.0212,\n 8.1258, 8.2298, 8.1667, 8.1039, 8.0416, 7.9796, 7.9179, 7.8567, 7.9599,\n 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which country does the Rhine encounter it's main tributaries?\nContext: It is here that the Rhine encounters some more of its main tributaries, such as the Neckar, the Main and, later, the Moselle, which contributes an average discharge of more than 300 m3/s (11,000 cu ft/s).\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.8667, 1.0596, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.6276, 0.5774,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.8165, 0.9497, 0.9017, 1.0338, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.3284, 1.2804, 1.2326, 1.1852, 1.1380, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.2049, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.1990, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.3666, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.5432, 5.7133, 5.8812, 6.0469, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.4065, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.1996, 6.3502, 6.4993, 6.4008,\n 6.5483, 6.4510, 6.5970, 6.7416, 6.6454, 6.5504, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.9282, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.5556, 7.6867, 7.5967, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.4868, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.7104, 8.6238, 8.5381, 8.6581, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.5607, 8.4788, 8.5964, 8.5153,\n 8.6321, 8.7482, 8.8636, 8.9783, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.3495, 9.2697, 9.3810, 9.4916, 9.6016, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.7224, 9.6456, 9.5695, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.3774, 9.4837, 9.4103, 9.5161, 9.4432,\n 9.5485, 9.6532, 9.7574, 9.8611, 9.7886, 9.8918, 9.8198, 9.9224,\n 9.8510, 9.7800, 9.8821, 9.9837, 9.9132, 9.8431, 9.9442, 9.8746,\n 9.9752, 10.0753, 10.1750, 10.1058, 10.2050, 10.1363, 10.2350, 10.1667,\n 10.0987, 10.1970, 10.2949, 10.3923, 10.3248, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: If two thirds of the Rhine flows through Waal, where does the other third flow through?\nContext: The other third of the water flows through the Pannerdens Kanaal and redistributes in the IJssel and Nederrijn.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "16.2%", + "z-score": "-2.56", + "p value": "0.995", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.5759, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.2151, -2.2629, -2.3102, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.2186, -2.2646,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.9369, -2.9762, -3.0151, -2.8536, -2.8928, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.6934, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.6576, -2.6960, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.5506, -2.5886, -2.6264, -2.6640, -2.5183, -2.5560])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.9589, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.7277, 8.8626,\n 8.7515, 8.8853, 8.7758, 8.6678, 8.8007, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.5448, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.6828, 9.5938,\n 9.7091, 9.6210, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.4935, 11.4080, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.6487, 11.5655, 11.4829, 11.4009, 11.3196,\n 11.2389, 11.1588, 11.0793, 11.0004, 11.1018, 11.2028, 11.1245, 11.0468,\n 11.1473, 11.0702, 10.9936, 10.9176, 10.8421, 10.7671, 10.6927, 10.6187,\n 10.5453, 10.4724, 10.3999, 10.3280, 10.2565, 10.3566, 10.2856, 10.2151,\n 10.1450, 10.0753, 10.0061, 9.9374, 9.8691, 9.8012, 9.7337, 9.6667,\n 9.6000, 9.5338, 9.4680, 9.4026, 9.3375, 9.2729, 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What paved the way for the Augsburg Confession?\nContext: Despite the disagreements on the Eucharist, the Marburg Colloquy paved the way for the signing in 1530 of the Augsburg Confession, and for the formation of the Schmalkaldic League the following year by leading Protestant nobles such as John of Saxony, Philip of Hesse, and George, Margrave of Brandenburg-Ansbach.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.1437, -1.9064, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.1185, -0.9152, -0.7145, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.9631, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, 0.0998, 0.2485, 0.1980,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.7095, 0.6600, 0.6108, 0.7493,\n 0.8868, 1.0235, 0.9739, 0.9245, 1.0598, 1.0105, 0.9615, 1.0954,\n 1.0465, 0.9979, 0.9497, 1.0820, 1.0338, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.7336, 0.6885, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.4189, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.9704, 2.8301, 3.0792, 3.3221, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.2563, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.6790, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.0937, 4.2844, 4.1740, 4.3614, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.1996, 6.1012, 6.2517, 6.1546,\n 6.3035, 6.4510, 6.3549, 6.5008, 6.6454, 6.5504, 6.6935, 6.5997,\n 6.5069, 6.4153, 6.5569, 6.6973, 6.6066, 6.7456, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.9768, 7.1111, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.5494, 7.6785, 7.5912, 7.5048, 7.4193, 7.3346, 7.2508, 7.3786,\n 7.2956, 7.2134, 7.1319, 7.0513, 6.9714, 7.0980, 7.2236, 7.3485,\n 7.2691, 7.1904, 7.3143, 7.4373, 7.3592, 7.4813, 7.4039, 7.5251,\n 7.6456, 7.7653, 7.8842, 7.8072, 7.9253, 7.8489, 7.7732, 7.8905,\n 7.8153, 7.7407, 7.6667, 7.7831, 7.8988, 7.8253, 7.7524, 7.8673,\n 7.9816, 7.9091, 8.0227, 7.9507, 7.8793, 7.8084, 7.7380, 7.8507,\n 7.9628, 8.0742, 8.0042, 7.9347, 7.8657, 7.9764, 7.9078, 7.8397,\n 7.7720, 7.8820, 7.9913, 8.1001, 8.0328, 7.9659, 7.8995, 8.0076,\n 7.9415, 7.8759, 7.8107, 7.7460, 7.8533, 7.7889, 7.8956, 7.8316,\n 7.7679, 7.7047, 7.6418, 7.7478, 7.8533, 7.7907, 7.8956, 7.8333,\n 7.9377, 8.0416, 7.9796, 7.9179, 7.8567, 7.9599, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What signer of the Articles of Confederation was descended from Huguenots?\nContext: Paul Revere was descended from Huguenot refugees, as was Henry Laurens, who signed the Articles of Confederation for South Carolina; Jack Jouett, who made the ride from Cuckoo Tavern to warn Thomas Jefferson and others that Tarleton and his men were on their way to arrest him for crimes against the king; Francis Marion, and a number of other leaders of the American Revolution and later statesmen.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, 0.0000, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.4021, 0.5717, 0.5120, 0.6794,\n 0.8452, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.3246, -0.3698, -0.2304, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.0907, 0.0452, 0.1803, 0.1348, 0.2689, 0.2234, 0.3563,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.8248, 0.9461, 0.9027, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.0684, 4.9528, 4.8394, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.1490, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 6.0928, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.1591, 7.2960, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.9839, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.5896, 8.5030, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.8958, 8.8108, 8.9285, 9.0453, 9.1615, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.4513, 9.3686, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.5224, 9.6317, 9.5532,\n 9.4752, 9.3979, 9.5066, 9.6148, 9.5381, 9.6456, 9.5695, 9.4939,\n 9.6008, 9.7072, 9.6322, 9.5577, 9.6635, 9.7688, 9.8736, 9.9778,\n 9.9038, 10.0074, 9.9340, 9.8611, 9.9642, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.7978, 10.8961,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What language did Tesla study while in school?\nContext: Tesla was the fourth of five children.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.5185, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.7595, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 5.7155, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 6.1283,\n 6.3509, 6.1143, 6.3333, 6.1101, 5.8966, 6.1137, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.5924, 6.4273, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.4017, 8.5491, 8.4188, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.3086, 9.4425, 9.5751, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.1124, 10.2375, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.2706, 10.3923, 10.5131, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.2623, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.1860, 11.0883, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.6584, 11.5645, 11.4714, 11.3791, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.7543, 11.8571, 11.9594, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.4746, 12.3883, 12.4870, 12.4015, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.6234, 12.7199, 12.8160, 12.9116, 12.8285,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.1491, 13.2419, 13.1617, 13.2542, 13.3463, 13.2668, 13.3585, 13.4499,\n 13.3710, 13.4620, 13.5526, 13.4744, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.6789, 13.7679, 13.6914, 13.7801, 13.8683, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In what year did the university first see a drop in applications?\nContext: In the early 1950s, student applications declined as a result of increasing crime and poverty in the Hyde Park neighborhood.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-0.997", + "p value": "0.841", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -1.7823, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.3926, -1.4631, -1.1918, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.0719, -1.1255, -0.9428,\n -0.9966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 6.8483, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.6064, 7.7387, 7.6466, 7.7778, 7.6867, 7.8168, 7.9460, 8.0741,\n 7.9839, 7.8948, 7.8065, 7.9336, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.1731, 8.2956, 8.4173, 8.5381, 8.4532, 8.3691,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.6321, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.7388, 8.8527,\n 8.9660, 9.0786, 9.0000, 9.1119, 9.0340, 9.1452, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.3212, 9.2450, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.5577, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.6532, 9.5808, 9.6850, 9.7886, 9.8918, 9.8198, 9.9224,\n 9.8510, 9.9531, 9.8821, 9.9837, 10.0848, 10.1855, 10.1149, 10.0448,\n 9.9752, 10.0753, 10.1750, 10.2743, 10.2050, 10.3038, 10.2350, 10.3333,\n 10.2650, 10.3628, 10.4603, 10.5573, 10.4893, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What causes the symptoms of inflammation?\nContext: The symptoms of inflammation are redness, swelling, heat, and pain, which are caused by increased blood flow into tissue.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "28.1%", + "z-score": "0.913", + "p value": "0.181", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.8165, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.9169, 0.8402, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.8520, 1.0289, 0.9631, 1.1375, 1.3101, 1.4809, 1.4142,\n 1.3483, 1.5164, 1.4506, 1.6166, 1.5511, 1.4863, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.6124,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.0284, 1.1711, 1.1183, 1.0659, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.7017, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 7.8113, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.9455, 8.0822, 8.2178, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.5400, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.6481, 10.5621, 10.6700, 10.7772,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.3423, 11.2589, 11.1761, 11.2789, 11.1968, 11.2992, 11.4009, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.5414, 11.4614, 11.5613, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.7000, 11.7980, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.3163, 12.2397, 12.1635, 12.2581,\n 12.3523, 12.2767, 12.3705, 12.2954, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.7756, 12.7017, 12.7928, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In which case did the Court state that Austria was not allowed to hold places in Austrian schools exclusively for Austrian students?\nContext: In Commission v Austria the Court held that Austria was not entitled to restrict places in Austrian universities to Austrian students to avoid \"structural, staffing and financial problems\" if (mainly German) foreign students applied for places because there was little evidence of an actual problem.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.9847,\n 1.1628, 1.3389, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.2060, 1.3517, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.6710, 1.6160, 1.5614, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.1593, 1.2943, 1.4284, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.5423, 1.4923, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.0328, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.9215, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.4770, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.2127, 11.3222, 11.2268,\n 11.1324, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.3791, 11.4857,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.8151, 11.9187, 11.8287, 11.9319,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.3629,\n 12.4625, 12.3754, 12.2891, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.5129, 12.6103, 12.7073, 12.6234, 12.7199, 12.8160, 12.9116, 13.0067,\n 12.9238, 13.0185, 12.9363, 13.0307, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.4150, 13.5069, 13.4263, 13.3463, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.5526, 13.6429, 13.7327, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 13.9332, 14.0214, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What type of flower is sought on Midsummer's Eve?\nContext: Each Midsummer\u2019s Eve, apart from the official floating of wreaths, jumping over fires, looking for the fern flower, there are musical performances, dignitaries' speeches, fairs and fireworks by the river bank.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.3499, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.7256, 0.8889, 0.8295, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 1.0812, 1.0284, 1.1711, 1.3128, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.5303, 1.6641, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.2049, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.2244, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.0336, 0.9897, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 6.8573, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 6.9646, 6.8419, 6.7213, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.7629, 8.6667, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.4738, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.5621, 10.6700, 10.7772,\n 10.6920, 10.7987, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.8019, 12.8957, 12.9891, 12.9099, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What are those from Jacksonville sometimes called?\nContext: People from Jacksonville may be called \"Jacksonvillians\" or \"Jaxsons\" (also spelled \"Jaxons\").\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.6667, 1.8543, 1.7765, 1.6997, 1.8838, 1.8074,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.9262, 1.8559, 2.0247, 1.9548, 2.1213,\n 2.0517, 1.9829, 1.9149, 1.8475, 1.7809, 1.7150, 1.8773, 1.8116,\n 1.9720, 2.1309, 2.2884, 2.2222, 2.1567, 2.3120, 2.2468, 2.4004,\n 2.3354, 2.2711, 2.2074, 2.1442, 2.0817, 2.0197, 2.1700, 2.3190,\n 2.2569, 2.1954, 2.3426, 2.2813, 2.4271, 2.3660, 2.5103, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.2943, 2.2361, 2.3764,\n 2.5156, 2.6540, 2.5954, 2.5373, 2.6742, 2.6163, 2.7520, 2.6943,\n 2.6370, 2.5802, 2.5238, 2.4678, 2.4122, 2.3570, 2.3022, 2.2478,\n 2.3805, 2.5123, 2.4578, 2.5886, 2.5343, 2.4803, 2.4267, 2.3735,\n 2.3206, 2.2680, 2.3967, 2.3443, 2.4721, 2.4198, 2.3679, 2.3163,\n 2.2650, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.3131, 2.2630,\n 2.3868, 2.5099, 2.4597, 2.4099, 2.3603, 2.3110, 2.2620, 2.2133,\n 2.3346, 2.2860, 2.4065, 2.5265, 2.4778, 2.5969, 2.5483, 2.6667,\n 2.6182, 2.5700, 2.5220, 2.6393, 2.7560, 2.7080, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.7%", + "z-score": "13.5", + "p value": "4.54e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 5.6569,\n 5.4271, 5.6614, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.4550, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.7045, 8.5915, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.5337, 8.4270, 8.3217, 8.2178, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 10.8224, 10.7349, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.6920, 10.7987, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.3616, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.6029, 11.7031, 11.8028, 11.9020, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.1329, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 12.9540, 13.0460, 12.9691, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.4371, 13.3615, 13.4510, 13.5401])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When was amnesty granted to those responsible for the massacre?\nContext: The exact number of fatalities throughout the country is not known.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.3244, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.2907, 0.2414, 0.3849,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.4174, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.3522, 0.3073, 0.4377, 0.5674, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "58.9%", + "z-score": "6.69", + "p value": "1.12e-11", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415, 4.3409, 4.6268,\n 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.2426, 4.0415, 4.3027, 4.5556,\n 4.8008, 5.0389, 5.2705, 5.4958, 5.3072, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.6000, 5.4322, 5.6395, 5.8424, 5.6805, 5.8797, 6.0751, 6.2668, 6.4550,\n 6.2993, 6.4846, 6.6667, 6.5158, 6.3687, 6.2251, 6.0849, 6.2651, 6.1283,\n 6.3058, 6.4807, 6.6531, 6.8229, 6.6896, 6.5591, 6.4312, 6.3058, 6.4738,\n 6.6395, 6.8031, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.6898])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: A function problem is an example of what?\nContext: Notable examples include the traveling salesman problem and the integer factorization problem.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 1.1547, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.6353, 1.8728, 1.7685, 2.0000,\n 1.8970, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.9711, 1.8791, 2.0870, 1.9959, 1.9064, 2.1094, 2.3094,\n 2.2200, 2.4163, 2.3276, 2.5205, 2.4327, 2.3462, 2.5352, 2.7217,\n 2.6354, 2.8189, 2.7333, 2.9140, 2.8292, 2.7456, 2.6632, 2.8402,\n 2.7585, 2.6778, 2.5983, 2.5198, 2.6928, 2.6148, 2.7854, 2.7080,\n 2.6316, 2.5560, 2.4814, 2.4077, 2.3349, 2.2629, 2.4286, 2.3570,\n 2.2862, 2.2162, 2.3791, 2.3094, 2.2405, 2.4010, 2.5600, 2.4910,\n 2.4228, 2.3552, 2.5117, 2.4444, 2.3779, 2.5322, 2.6852, 2.6186,\n 2.7699, 2.7037, 2.8534, 2.7875, 2.7222, 2.8701, 3.0168, 2.9515,\n 2.8868, 2.8226, 2.9673, 2.9035, 2.8402, 2.9832, 3.1251, 3.0619,\n 3.2025, 3.1395, 3.2788, 3.2161, 3.1539, 3.2918, 3.4286, 3.3665,\n 3.3049, 3.2437, 3.3789, 3.3181, 3.2577, 3.3915, 3.5245, 3.4641,\n 3.4042, 3.3447, 3.4762, 3.4170, 3.3582, 3.4884, 3.6178, 3.5590,\n 3.5007, 3.4428, 3.5708, 3.5131, 3.4558, 3.5827, 3.7087, 3.6515,\n 3.5946, 3.5382, 3.6629, 3.6067, 3.5508, 3.6745, 3.7975, 3.7417,\n 3.6862, 3.6310, 3.7528, 3.6979, 3.6433, 3.7641, 3.8843, 3.8297,\n 3.7755, 3.7216, 3.8406, 3.7869, 3.7335, 3.8516, 3.9691, 3.9158,\n 3.8627, 3.8100, 3.9265, 3.8739, 3.8216, 3.9372, 4.0522, 4.0000,\n 3.9481, 3.8964, 4.0105, 3.9590, 3.9078, 4.0210, 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.7823, 2.0494, 1.9245, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.3566, 3.2348, 3.4528, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.8431, 4.0446, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.0937, 4.2844, 4.4721, 4.6571, 4.8394, 5.0190, 4.9075,\n 5.0844, 4.9747, 4.8669, 5.0410, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.2483, 6.1471, 6.0474, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.5970, 6.5008, 6.6454, 6.5504, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.2532, 7.1591, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.9460, 8.0741,\n 7.9839, 8.1111, 8.2372, 8.1481, 8.0598, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.2768, 9.3915, 9.5054,\n 9.4213, 9.3380, 9.2554, 9.3686, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.0910, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.0987, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.4367, 10.3617, 10.4638, 10.3893, 10.4909, 10.4170, 10.5181, 10.4447,\n 10.5453, 10.4724, 10.3999, 10.3280, 10.4281, 10.5278, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.8505, 10.9480, 11.0450, 11.1415, 11.0705, 11.1667,\n 11.0961, 11.0261, 10.9564, 10.8872, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In what year did Tesla's family move to Gospic?\nContext: Tesla was the fourth of five children.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "169", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "29.0%", + "z-score": "1.2", + "p value": "0.115", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.4444, 0.6083, 0.5505, 0.4932, 0.6547,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.8325, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.8601, 0.8095, 0.7593, 0.7095, 0.8485, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.8755, 0.8268, 0.7784, 0.9129,\n 0.8645, 0.9979, 1.1305, 1.0820, 1.0338, 0.9858, 1.1169, 1.0690,\n 1.1991])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "148", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "75.7%", + "z-score": "14.2", + "p value": "2.69e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 5.9530, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.1583, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.9815,\n 9.1225, 8.9935, 9.1333, 9.2717, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.4770, 10.3615, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.8542, 10.9727, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.5519, 11.4420, 11.5556, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.2207, 12.3289, 12.4365, 12.5434, 12.6496, 12.5434, 12.6491,\n 12.7542, 12.6495, 12.7542, 12.8582, 12.9616, 13.0644, 13.1665, 13.2681,\n 13.1657, 13.2669, 13.3675, 13.4675, 13.3667, 13.4664, 13.5655, 13.6640,\n 13.5647, 13.6630, 13.7606, 13.8578, 13.9544, 13.8567, 13.9531, 14.0489,\n 13.9524, 14.0479, 14.1429, 14.2374])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What did Carolina face in the opening drive that they had not faced the entire postseason?\nContext: A pair of carries by C. J. Anderson moved the ball up 20 yards to the Panthers 14-yard line, but Carolina's defense dug in over the next three plays.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -0.8374, -0.8811, -0.7396, -0.7833, -0.6430, -0.6868, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.2146, -0.2568, -0.1280, 0.0000,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.1857, 7.3467, 7.2169,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.0387, 6.9204, 7.0763,\n 6.9601, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.5593, 7.7026, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.5331, 9.4327, 9.3333, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.0562, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.3043, 11.2194, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.8019, 12.7226, 12.6439, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.6667,\n 13.5897, 13.6789, 13.7679, 13.6914, 13.7801, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who was the mayor of San Francisco during Super Bowl 50?\nContext: San Francisco mayor Ed Lee said of the highly visible homeless presence in this area \"they are going to have to leave\".\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "74", + "Fraction of T in Greenlist": "37.2%", + "z-score": "3.97", + "p value": "3.59e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.3744, 1.6013, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.8791, 2.0870, 1.9959, 1.9064, 2.1094, 2.3094,\n 2.2200, 2.1320, 2.0455, 2.2404, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.0948, 2.0135, 2.2000, 2.3842, 2.3028, 2.2226, 2.4034, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.0158, 1.9413, 2.1167, 2.2902, 2.2156,\n 2.3868, 2.3126, 2.4814, 2.6485, 2.8138, 2.7393, 2.6656, 2.5927,\n 2.5207, 2.6828, 2.8433, 2.7713, 2.9299, 2.8583, 2.7875, 2.9439,\n 2.8735, 2.8039, 2.7349, 2.6667, 2.5991, 2.5322, 2.4660, 2.4004,\n 2.5527, 2.4874, 2.4227, 2.5731, 2.7222, 2.8701, 2.8051, 2.7406,\n 2.8868, 2.8226, 2.7591, 2.6961, 2.6336, 2.5717, 2.7153, 2.8577,\n 2.7958, 2.9369, 3.0770, 3.0151, 2.9537, 3.0923, 3.2299, 3.1685,\n 3.1076, 3.0471, 2.9872, 2.9277, 2.8687, 2.8101, 2.9451, 2.8868,\n 3.0206, 3.1536, 3.0952, 3.0373, 2.9798, 3.1113, 3.2419, 3.1844,\n 3.3140, 3.4428, 3.3853, 3.3282, 3.4558, 3.5827, 3.5256, 3.4689,\n 3.4126, 3.5382, 3.6629, 3.6067, 3.5508, 3.4953, 3.4401, 3.3853,\n 3.5085, 3.6310, 3.5762, 3.5218, 3.6433, 3.5890, 3.5351, 3.4816,\n 3.4283, 3.3754, 3.3228, 3.4427, 3.5619, 3.5093, 3.6277, 3.5753,\n 3.6929, 3.8100, 3.9265, 3.8739, 3.8216, 3.7697, 3.7180, 3.8333,\n 3.9481, 3.8964, 4.0105, 3.9590, 3.9078, 4.0210, 3.9699])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.5433, 6.7390, 6.9307, 6.7489, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.8420, 8.0064, 7.8512, 8.0139, 7.8628, 7.7152,\n 7.8766, 7.7326, 7.5916, 7.7517, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.1428, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.6667, 10.7835, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.2623, 11.3740, 11.4849, 11.5950,\n 11.4945, 11.6041, 11.7130, 11.6139, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.0902, 12.1936, 12.2963, 12.3985, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.9011, 12.8095, 12.9085, 13.0071, 12.9165, 12.8267, 12.9249,\n 12.8359, 12.9337, 12.8456, 12.9430, 13.0400, 12.9527, 12.8661, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 13.0903, 13.0067,\n 12.9238, 13.0185, 13.1129, 13.0307, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.4150, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.0784, 14.1667,\n 14.2546, 14.3422, 14.4294, 14.5162, 14.4382, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What day of the week did Shark Tank debut on?\nContext: Shark Tank (based on the Dragon's Den reality format) also became a midseason sleeper hit on Sundays in the spring of 2010; the following season, it became the tentpole of the network's Friday night schedule, gradually helping make ABC a strong competitor (after being paired with 20/20 and beginning with the 2012\u201313 season, the Tim Allen sitcom Last Man Standing) against CBS' long-dominant drama/reality lineup on that night for the first time since the \"TGIF\" lineup ended in 2000.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.8729, 1.1793, 1.0541, 1.3480, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, -0.1295, -0.1721, -0.0429, 0.0856, 0.0427, 0.0000,\n 0.1273, 0.0847, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.4678, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.6896, 6.5591, 6.7269, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.4083, 10.5236, 10.4263, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.9291, 10.8363, 10.9462, 11.0554, 11.1640, 11.0724, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 11.9737, 12.0749, 12.1756,\n 12.0891, 12.1893, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.5401, 12.6367, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 12.8546, 12.9491, 12.8680, 12.9621, 13.0558,\n 12.9755, 12.8957, 12.9891, 13.0821, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.3060, 13.3967, 13.4871, 13.4100, 13.3333,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How much of Jacksonville is made up of water?\nContext: According to the United States Census Bureau, the city has a total area of 874.3 square miles (2,264 km2), making Jacksonville the largest city in land area in the contiguous United States; of this, 86.66% (757.7 sq mi or 1,962 km2) is land and ; 13.34% (116.7 sq mi or 302 km2) is water.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.5170, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "148", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "52.0%", + "z-score": "7.59", + "p value": "1.56e-14", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 1.7321, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570, 2.1939, 2.0381, 2.3333,\n 2.6186, 2.8947, 3.1623, 3.0072, 3.2660, 3.5176, 3.3665, 3.6108, 3.4641,\n 3.3221, 3.1844, 3.4207, 3.6515, 3.8772, 3.7417, 3.9620, 4.1779, 4.3894,\n 4.5968, 4.8003, 5.0000, 5.1962, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997,\n 5.6830, 5.8635, 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557,\n 7.2169, 7.3760, 7.5331, 7.4061, 7.2815, 7.4370, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.0401, 7.1909, 7.0803, 6.9714, 7.1207,\n 7.2684, 7.1611, 7.0553, 6.9511, 6.8483, 6.7469, 6.6469, 6.7931, 6.6944,\n 6.5970, 6.5008, 6.6454, 6.5504, 6.6935, 6.8354, 6.7414, 6.6486, 6.5569,\n 6.6973, 6.6066, 6.5169, 6.4283, 6.3408, 6.2541, 6.1685, 6.0838, 6.2222,\n 6.1383, 6.2755, 6.1924, 6.3283, 6.4632, 6.3807, 6.5144, 6.4327, 6.3517,\n 6.4842, 6.4040, 6.5354, 6.6658, 6.7952, 6.7155, 6.8439, 6.9714, 7.0980,\n 7.0187, 7.1443, 7.0658, 6.9879, 7.1125, 7.0353, 7.1590, 7.2818, 7.4039,\n 7.3271, 7.2510, 7.1755, 7.2966, 7.2217, 7.3419, 7.4613, 7.5800, 7.5056,\n 7.4317, 7.5495, 7.4762, 7.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What museum preserves the memory of the crime?\nContext: A fine tribute to the fall of Warsaw and history of Poland can be found in the Warsaw Uprising Museum and in the Katy\u0144 Museum which preserves the memory of the crime.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "174", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "25.3%", + "z-score": "0.0875", + "p value": "0.465", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.1761, 0.1317, 0.0875])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "85", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "60.0%", + "z-score": "7.45", + "p value": "4.59e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660, 3.6566,\n 4.0166, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188, 4.3409, 4.6268,\n 4.9008, 4.6476, 4.4096, 4.6775, 4.9358, 4.7140, 4.5033, 4.7556, 5.0000,\n 5.2372, 5.4678, 5.2705, 5.0811, 4.8990, 5.1257, 5.3468, 5.1723, 5.0037,\n 5.2204, 5.4322, 5.2697, 5.4772, 5.3199, 5.1671, 5.3708, 5.5705, 5.7664,\n 5.9588, 6.1477, 6.3333, 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.7568,\n 6.9286, 7.0980, 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.1857, 7.0557,\n 6.9282, 7.0895, 6.9646, 6.8419, 6.7213, 6.8810, 7.0387, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460, 7.6339,\n 7.5234, 7.4146, 7.3073, 7.4521])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When people take on debt, it leads potentially to what?\nContext: If the state does not provide these services, then for those on lower incomes, the costs must be borrowed and often those on lower incomes are those who are worse equipped to manage their finances.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.3849, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.3308, 1.5119, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.7150, 1.8773, 1.8116,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.6271, 1.7772, 1.9261, 1.8665, 2.0140, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.7522, 1.6958, 1.6398, 1.7823,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.7018, 1.8411, 1.9795, 1.9245,\n 2.0617, 2.0068, 1.9524, 1.8983, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.8204, 1.7679, 1.7158, 1.8490, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.7746, 1.9052, 2.0350, 1.9837, 2.1125, 2.0613, 2.0105, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.8875, 1.8383, 1.7894, 1.9149,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.8453, 1.9686, 2.0913, 2.0430,\n 2.1648, 2.1167, 2.0688, 2.0212, 1.9738, 1.9267, 1.8799, 1.8333,\n 1.9533, 1.9068, 1.8605, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 7.9196, 7.8000, 7.9472, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.0632, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.4524, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.7141, 10.8224, 10.9301, 11.0371, 11.1435, 11.2493, 11.1621,\n 11.0756, 10.9898, 10.9048, 11.0102, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.1761, 11.0940, 11.1968, 11.2992, 11.4009, 11.3196,\n 11.4209, 11.5217, 11.4411, 11.5414, 11.6412, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 12.0712, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.3935, 12.3163, 12.2397, 12.1635, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.3888, 12.4818, 12.4074, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.5367, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Stretched rocks that pinch into lenses are known by what word?\nContext: These stretched rocks can also pinch into lenses, known as boudins, after the French word for \"sausage\", because of their visual similarity.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.5560,\n -2.6148, -2.6726, -2.3772, -2.0889, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -0.8785, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.5040, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.1502, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.3127, -0.1782,\n -0.0444, -0.0886, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.7", + "p value": "2.89e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.9795, 2.2133, 2.4422, 2.3333,\n 2.5568, 2.4495, 2.6679, 2.8823, 2.7757, 2.9856, 3.1918, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.4743, 3.3729, 3.5642, 3.7528,\n 3.9386, 4.1219, 4.0205, 4.2008, 4.3788, 4.2784, 4.4537, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.0332, 4.9346, 4.8375, 5.0017, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.5569, 6.6973, 6.6066, 6.7456, 6.8834, 7.0201,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.5494, 7.6785, 7.8065, 7.9336, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.6924, 8.8108, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.5183, 12.6102, 12.7017])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who was influential in promoting the use of chemical compounds as medicines?\nContext: Living in the 10th century, he wrote The foundations of the true properties of Remedies, amongst others describing arsenious oxide, and being acquainted with silicic acid.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "69", + "Fraction of T in Greenlist": "34.7%", + "z-score": "3.15", + "p value": "0.000812", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.3221, 3.1844, 3.4207, 3.2863,\n 3.1558, 3.0290, 2.9055, 2.7852, 2.6681, 2.5538, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.6679, 2.5621, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.3706, 2.2743, 2.4804, 2.6833, 2.5873, 2.4930, 2.4004, 2.3094,\n 2.5064, 2.7005, 2.6098, 2.5205, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.0948, 2.0135, 2.2000, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 2.0158, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.6164, 1.7865, 1.9548, 2.1213,\n 2.0517, 2.2162, 2.1470, 2.0785, 2.0107, 2.1723, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 2.0276, 2.1822,\n 2.1182, 2.0548, 1.9920, 2.1442, 2.0817, 2.2323, 2.1700, 2.3190,\n 2.2569, 2.4045, 2.5508, 2.4887, 2.6336, 2.5717, 2.5103, 2.4495,\n 2.3891, 2.5318, 2.6735, 2.6131, 2.5532, 2.4938, 2.4348, 2.3764,\n 2.3183, 2.2608, 2.3995, 2.5373, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.2535, 2.1980, 2.1429, 2.2780, 2.2230, 2.1685, 2.3022, 2.4351,\n 2.3805, 2.3262, 2.4578, 2.4037, 2.3500, 2.4803, 2.6099, 2.7386,\n 2.8666, 2.8124, 2.9394, 2.8853, 2.8316, 2.7783, 2.9040, 3.0290,\n 2.9756, 2.9225, 2.8698, 2.8174, 2.7654, 2.7137, 2.6623, 2.7852,\n 2.9076, 2.8561, 2.8050, 2.7541, 2.8752, 2.8245, 2.9448, 2.8943,\n 3.0138, 2.9633, 3.0821, 3.2002, 3.1497, 3.2671, 3.2167, 3.1667,\n 3.1169, 3.0674, 3.1836, 3.2991, 3.2496, 3.2004, 3.1514])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.6630, 9.5304, 9.4000, 9.5366, 9.6719, 9.8058, 9.6786, 9.8116,\n 9.9433, 10.0737, 9.9495, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.5893, 10.4704, 10.5940, 10.7164, 10.8379, 10.9585, 11.0780,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.6620, 11.5492,\n 11.4378, 11.5519, 11.6652, 11.5556, 11.4471, 11.5601, 11.4531, 11.5655,\n 11.6772, 11.5718, 11.4675, 11.3644, 11.4759, 11.5866, 11.4849, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.6139, 11.7222, 11.8299, 11.9370, 11.8392,\n 11.9457, 11.8491, 11.7533, 11.8594, 11.7647, 11.6709, 11.7766, 11.8818,\n 11.9863, 11.8937, 11.8018, 11.9060, 12.0096, 11.9187, 11.8287, 11.9319,\n 11.8427, 11.9455, 12.0476, 11.9594, 11.8719, 11.7851, 11.8870, 11.8010,\n 11.9024, 11.8172, 11.9181, 11.8336, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.9669, 11.8846, 11.8028, 11.7217, 11.6412, 11.7405, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 11.8956, 11.8176, 11.9147, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.0493, 12.1447, 12.0685, 12.1635, 12.2581,\n 12.3523, 12.4460, 12.5394, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.7585, 12.8499, 12.9410, 13.0316, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When did Tesla move to New York City?\nContext: In 1882, Tesla began working for the Continental Edison Company in France, designing and making improvements to electrical equipment.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -2.5166, -2.0656, -2.1418, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.5620, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.3522, -0.3951, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.5864, -0.6266, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "159", + "Fraction of T in Greenlist": "79.9%", + "z-score": "17.9", + "p value": "7.69e-72", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 6.9402,\n 7.1358, 7.3271, 7.1241, 7.3131, 7.4983, 7.6800, 7.8583, 8.0333,\n 8.2052, 8.0178, 8.1882, 8.3557, 8.1763, 8.3423, 8.5057, 8.6667,\n 8.8252, 8.9815, 9.1355, 8.9672, 9.1201, 9.2710, 9.1084, 9.2582,\n 9.4060, 9.5520, 9.6962, 9.8387, 9.9795, 9.8254, 9.9653, 10.1036,\n 9.9540, 10.0915, 10.2275, 10.3621, 10.4952, 10.6270, 10.7575, 10.6145,\n 10.7442, 10.8727, 10.7333, 10.8612, 10.9878, 11.1132, 11.2376, 11.3608,\n 11.4829, 11.3489, 11.4704, 11.5909, 11.4599, 11.5799, 11.6988, 11.8168,\n 11.9338, 12.0499, 12.1651, 12.0386, 12.1533, 12.2671, 12.1432, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.7017, 12.8110, 12.6909, 12.7998, 12.9080,\n 12.7900, 12.8978, 13.0048, 13.1111, 13.2167, 13.3217, 13.4259, 13.3113,\n 13.4152, 13.5185, 13.4057, 13.5086, 13.6109, 13.7125, 13.8136, 13.9140,\n 14.0139, 13.9040, 14.0036, 14.1025, 13.9944, 14.0930, 14.1911, 14.2887,\n 14.3857, 14.4822, 14.5781, 14.4725, 14.5682, 14.6634, 14.5593, 14.6542,\n 14.7486, 14.8425, 14.9359, 15.0289, 15.1213, 15.0195, 15.1118, 15.2036,\n 15.1031, 15.1946, 15.2857, 15.3764, 15.4666, 15.5563, 15.6457, 15.5473,\n 15.6365, 15.7252, 15.6280, 15.7165, 15.8046, 15.8923, 15.9796, 16.0665,\n 16.1531, 16.0578, 16.1441, 16.2301, 16.1358, 16.2216, 16.3070, 16.3920,\n 16.4767, 16.5610, 16.6450, 16.5525, 16.6363, 16.7197, 16.6282, 16.7115,\n 16.7944, 16.8770, 16.9592, 17.0411, 17.1227, 17.0328, 17.1143, 17.1954,\n 17.1064, 17.1873, 17.2680, 17.3483, 17.4284, 17.5081, 17.5875, 17.5000,\n 17.5793, 17.6583, 17.5716, 17.6504, 17.7290, 17.8072, 17.8852])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the population of the second largest city in California?\nContext: Many of southern California's most developed cities lie along or in close proximity to the coast, with the exception of San Bernardino and Riverside.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.7461, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.9439, 2.7778, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.7336, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 4.9316, 5.1241, 4.9962, 5.1854, 5.0602, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.7927, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.7224, 9.6307, 9.5400, 9.6566, 9.5668, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.5621, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.6016, 11.5234, 11.6220, 11.7200, 11.8176, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.9534, 12.0493, 11.9730, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What has a Lama determined to do?\nContext: In Tibetan Buddhism the teachers of Dharma in Tibet are most commonly called a Lama.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.548", + "p value": "0.708", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.0626, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.3333, -0.3797, -0.4257, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 6.7543, 6.9570, 6.7402, 6.9402,\n 7.1358, 6.9310, 7.1241, 7.3131, 7.4983, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 7.8360, 8.0076, 8.1763, 8.0018, 8.1689, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.1684, 8.3281, 8.4856, 8.3324,\n 8.4884, 8.3391, 8.1929, 8.0498, 8.2054, 8.3589, 8.2195, 8.3716,\n 8.2353, 8.1016, 8.2525, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.4858, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.3473,\n 11.4599, 11.3555, 11.4675, 11.3644, 11.4759, 11.5866, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.7130, 11.8212, 11.9288, 12.0357, 12.1419, 12.0433,\n 12.1491, 12.2541, 12.1568, 12.2615, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.3809, 12.2868, 12.3895, 12.4915, 12.3985, 12.5001, 12.4081, 12.3168,\n 12.2263, 12.3277, 12.4286, 12.3391, 12.4395, 12.3508, 12.2628, 12.3629,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.8160, 12.7329, 12.6504,\n 12.7461, 12.6643, 12.7597, 12.6785, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.1746, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.6313, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the idealized value of imperialism?\nContext: For some, imperialism designated a policy of idealism and philanthropy; others alleged that it was characterized by political self-interest, and a growing number associated it with capitalist greed.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.0486, -0.0969, -0.1448, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 5.8140, 5.6830, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.7132,\n 6.8641, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.1393,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.7555, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.9630, 7.8699, 8.0000, 7.9079, 8.0370, 8.1651, 8.0741,\n 7.9839, 8.1111, 8.0219, 8.1481, 8.2733, 8.1850, 8.3093, 8.2219,\n 8.3453, 8.2588, 8.3813, 8.5030, 8.4173, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.5249, 8.6433, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.7482, 8.6677, 8.7831, 8.7033, 8.8179, 8.9319, 8.8527,\n 8.9660, 8.8874, 9.0000, 8.9221, 9.0340, 9.1452, 9.0679, 8.9912,\n 9.1018, 9.0257, 9.1357, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.2368, 9.3443, 9.4513, 9.3774, 9.3040, 9.4103, 9.3374, 9.4432,\n 9.5485, 9.4761, 9.5808, 9.5089, 9.6130, 9.5416, 9.6452, 9.7483,\n 9.6774, 9.6069, 9.7095, 9.6394, 9.7415, 9.8431, 9.7735, 9.8746,\n 9.8054, 9.9060, 9.8373, 9.9374, 10.0371, 9.9687, 9.9008, 10.0000,\n 9.9325, 10.0312, 10.1295, 10.0624, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What kind of water body is rumored to be obscuring Genghis Khan's burial site?\nContext: Folklore says that a river was diverted over his grave to make it impossible to find (the same manner of burial as the Sumerian King Gilgamesh of Uruk and Atilla the Hun).\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.5143, -0.3586, -0.4082,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.3333, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, -0.1391, 0.0000, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.2722, 0.2261, 0.1803, 0.3146, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.3884, 0.3443, 0.4721, 0.5991, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.0000, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 6.7893,\n 6.9589, 6.8229, 6.6896, 6.5591, 6.4312, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.9646, 6.8419, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.6033, 7.7414, 7.8782, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.0000, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.6102, 8.7327, 8.6436,\n 8.5553, 8.6770, 8.7978, 8.9178, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.8430, 9.9542, 9.8702, 9.7869, 9.8975, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.3024,\n 10.4087, 10.3284, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.3032, 11.2250,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.3688, 11.2924, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.2126, 11.1392, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.1164, 11.0450, 11.1415, 11.0705, 11.0000,\n 11.0961, 11.0261, 11.1218, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who won the Ekstraklasa Championship in 2000?\nContext: They also won the country\u2019s championship in 1946, and won the cup twice as well.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "44", + "# Tokens in Greenlist": "6", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-1.74", + "p value": "0.959", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.7877, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.8876, 7.0401, 6.9294, 7.0803, 6.9714,\n 6.8641, 7.0133, 6.9076, 7.0553, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.7174, 7.6210, 7.7555, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 7.9079, 8.0370, 7.9460, 7.8558,\n 7.9839, 8.1111, 8.2372, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.7869, 9.8975, 9.8150,\n 9.7331, 9.8431, 9.7619, 9.8712, 9.7908, 9.7109, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.0631, 10.1695, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.3557, 10.2790, 10.3827, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.4909, 10.5921, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.7714, 10.8702, 10.9685, 10.8961,\n 10.9939, 11.0913, 11.1883, 11.1164, 11.0450, 11.1415, 11.2376, 11.3333,\n 11.4286, 11.3577, 11.2872, 11.3820, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What were the Saxon Palace and Br\u00fchl Palace in prewar Warsaw?\nContext: Warsaw\u2019s municipal government authorities have decided to rebuild the Saxon Palace and the Br\u00fchl Palace, the most distinctive buildings in prewar Warsaw.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 1.0319, 0.9733, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.2060, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.1711, 1.3128, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.4284, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.2623, 1.2136, 1.3443, 1.2956, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.4087, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.6737, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.7780, 1.7310, 1.8527, 1.9738, 2.0943, 2.2141, 2.1667,\n 2.1195, 2.0726, 2.0259, 1.9795, 1.9333, 2.0515, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.9954, 5.8812, 5.7689, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.6075, 6.7583, 6.9076, 6.8034, 6.7006, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.0060, 8.9178, 9.0370, 9.1553, 9.2729, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.8159, 9.7312, 9.6471, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.7959, 10.8984, 11.0004, 10.9220, 10.8443, 10.7671, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.4891,\n 11.4132, 11.3378, 11.4356, 11.5329, 11.6297, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.9380, 11.8638, 11.9586, 11.8849, 11.9792, 12.0731, 12.0000,\n 11.9273, 11.8551, 11.9487, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What thesis specifies that a polynomial relationship exists within time complexities in a computational model?\nContext: This forms the basis for the complexity class P, which is the set of decision problems solvable by a deterministic Turing machine within polynomial time.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.8489, 1.7408, 1.6353, 1.8728, 2.1054, 2.3333,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.4585, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094, 2.0207,\n 2.2200, 2.4163, 2.6098, 2.8006, 2.7107, 2.8983, 2.8093, 2.7217,\n 2.9057, 2.8189, 2.7333, 2.9140, 3.0924, 3.0071, 2.9231, 2.8402,\n 2.7585, 2.6778, 2.8518, 2.7717, 2.9433, 2.8638, 2.7854, 2.7080,\n 2.8764, 2.7995, 2.7235, 2.8893, 3.0533, 2.9775, 2.9025, 3.0641,\n 2.9897, 2.9161, 2.8433, 2.7713, 2.7001, 2.8583, 2.7875, 2.7175,\n 2.8735, 2.8039, 2.9582, 3.1111, 3.0415, 2.9726, 2.9044, 3.0551,\n 2.9872, 3.1363, 3.0688, 3.0019, 3.1492, 3.2953, 3.2285, 3.1623,\n 3.3066, 3.4499, 3.5920, 3.7330, 3.6664, 3.6004, 3.5350, 3.6742,\n 3.6091, 3.7471, 3.6824, 3.6181, 3.7547, 3.6908, 3.6274, 3.5645,\n 3.5022, 3.4403, 3.3789, 3.5132, 3.6466, 3.7791, 3.9107, 3.8490,\n 3.9795, 3.9181, 3.8571, 3.9865, 3.9258, 3.8655, 3.9936, 4.1210,\n 4.0608, 4.0011, 3.9418, 3.8829, 3.8244, 3.9501, 3.8919, 4.0166,\n 3.9586, 3.9010, 3.8438, 3.9673, 3.9104, 3.8538, 3.9762, 4.0980,\n 4.0415, 3.9853, 4.1061, 4.0501, 3.9945, 3.9392, 3.8843, 3.8297,\n 3.9491, 3.8947, 3.8406, 3.9590, 3.9052, 4.0228, 4.1399, 4.0860,\n 4.0325, 3.9793, 4.0953, 4.0423, 4.1576, 4.1048, 4.0522, 4.1667,\n 4.2805, 4.2280, 4.1758, 4.2889, 4.4014, 4.5134, 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.4294, 7.2910, 7.1554, 7.3183, 7.1857, 7.0557, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.9204, 6.8041,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.2016, 7.0973, 6.9945, 6.8931,\n 6.7931, 6.6944, 6.8391, 6.7416, 6.6454, 6.7886, 6.9305, 7.0711,\n 6.9759, 6.8819, 6.7890, 6.9282, 6.8364, 6.9743, 7.1110, 7.0201,\n 6.9303, 6.8414, 6.7536, 6.8889, 7.0231, 7.1563, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.5912, 7.7192, 7.8463, 7.9724, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.1692, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.3284, 10.2486, 10.1695, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 10.9458, 11.0468,\n 10.9697, 10.8931, 10.9936, 10.9176, 10.8421, 10.7671, 10.8673, 10.7928,\n 10.8925, 10.8186, 10.7451, 10.8444, 10.7714, 10.8702, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.1883, 11.1164, 11.2129, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.1919, 11.1218, 11.2171, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What happened to his lab?\nContext: His lab was torn down in 1904, and its contents were sold two years later to satisfy a debt.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.0290, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.6710, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -1.8324, -1.6859,\n -1.7270, -1.7679, -1.6231, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.7997, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.7213, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 6.7338, 6.9310, 7.1241, 6.9282, 6.7390, 6.5561, 6.7489, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.1952, 7.3659, 7.5340, 7.3853, 7.2400, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.4174, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.0495, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.4346, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.7725, 9.8877, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.0504, 10.1627, 10.0748, 10.1865, 10.2975,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.4537, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.0342, 9.9524, 10.0611, 10.1692, 10.2766, 10.1955, 10.1151,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.7423, 10.6650, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.2630, 11.3608, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.7687, 11.6949, 11.7901, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.9273, 12.0209, 11.9487, 12.0419, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How does the level of tuition in German private schools compare to private schools in other Western European countries?\nContext: Therefore, most Ersatzschulen have very low tuition fees and/or offer scholarships, compared to most other Western European countries.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.5832, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.9488, 1.1111, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.4664, 1.6222, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.4757,\n 1.4171, 1.5681, 1.5097, 1.4517, 1.6008, 1.5430, 1.4857, 1.6330,\n 1.5758, 1.7217, 1.6646, 1.8091, 1.7522, 1.6958, 1.6398, 1.7823,\n 1.9237, 2.0642, 2.0078, 1.9518, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.8699, 2.0068, 1.9524, 1.8983, 1.8446, 1.7913, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.4427, 1.5731, 1.5236, 1.4743, 1.6036,\n 1.7321, 1.8598, 1.8102, 1.7609, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.8175, 1.7693, 1.7213, 1.6737, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.7780, 1.8999, 1.8527, 1.8058, 1.7592, 1.7128, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094, 2.1004, 2.4495,\n 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712, 3.5796, 3.8497, 4.1111,\n 4.3644, 4.6101, 4.4272, 4.6663, 4.4907, 4.3217, 4.1586, 4.0012, 3.8490,\n 3.7017, 3.9337, 3.7905, 4.0166, 4.2378, 4.0980, 4.3142, 4.5260, 4.3894,\n 4.2563, 4.4634, 4.6667, 4.8662, 4.7357, 4.6082, 4.8038, 4.6790, 4.5569,\n 4.7488, 4.9377, 4.8177, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190,\n 5.1962, 5.3709, 5.5432, 5.4312, 5.6011, 5.7689, 5.6585, 5.8241, 5.7155,\n 5.6086, 5.5035, 5.4000, 5.2981, 5.4610, 5.6220, 5.5213, 5.6804, 5.8377,\n 5.9932, 5.8936, 5.7955, 5.9491, 6.1012, 6.0041, 6.1546, 6.0587, 6.2075,\n 6.3549, 6.2601, 6.4059, 6.3122, 6.4566, 6.5997, 6.7414, 6.8819, 6.7890,\n 6.6973, 6.8364, 6.9743, 6.8834, 7.0201, 6.9303, 7.0657, 7.2001, 7.3333,\n 7.4655, 7.3765, 7.2884, 7.4194, 7.5494, 7.4622, 7.5912, 7.7192, 7.6328,\n 7.7598, 7.6742, 7.8003, 7.7155, 7.8406, 7.9649, 7.8808, 8.0042, 7.9209,\n 7.8384, 7.7567, 7.6758, 7.7981, 7.7178, 7.8393, 7.9600, 8.0798, 8.1989,\n 8.3172, 8.2375, 8.3550, 8.2760, 8.1976, 8.1198, 8.0427, 8.1594, 8.2754,\n 8.1988, 8.1229, 8.2381, 8.3526, 8.2772, 8.2024, 8.3162, 8.4293, 8.3550,\n 8.4674, 8.3937, 8.5054, 8.6165, 8.7270, 8.6537, 8.7636, 8.8728, 8.8000,\n 8.9086, 8.8364, 8.7646, 8.6933, 8.6226, 8.5524, 8.6603, 8.7676, 8.6978,\n 8.8045, 8.9107, 9.0164, 8.9469, 8.8780, 8.9830, 9.0876, 9.0190, 9.1230,\n 9.0549, 9.1584, 9.2613, 9.1936, 9.2960, 9.2287, 9.3306, 9.4321, 9.5331,\n 9.6336, 9.5666, 9.5000, 9.6000, 9.6996, 9.6334, 9.7325, 9.6666, 9.7653,\n 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: For about how long would the extended LM allow a surface stay on the moon?\nContext: The Lunar Module (LM) was designed to descend from lunar orbit to land two astronauts on the Moon and take them back to orbit to rendezvous with the Command Module.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.0722, -1.1431, -0.8893, -0.9608, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -0.9802, -0.8374, -0.8811, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.5706, -0.6128, -0.6547, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.9601, 6.8458, 7.0000, 6.8876, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.6976, 8.6035, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.4501, 9.5668, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.3617, 9.2768, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.6471, 9.7590, 9.8702, 9.7869, 9.8975, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.4596, 10.5642, 10.4852, 10.5893,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.5625, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.0904, 11.0165, 11.1148, 11.2126, 11.3099, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.7169, 11.6441, 11.7389, 11.8333,\n 11.7611, 11.8551, 11.7833, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the population of the Dutch Republic before this emigration?\nContext: This was a huge influx as the entire population of the Dutch Republic amounted to ca.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "187", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "20.3%", + "z-score": "-1.48", + "p value": "0.93", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.5275,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.3344, -1.1693, -1.2173, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.4777])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.0186, 4.8742, 5.0779, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 5.7735,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.5258, 7.4316, 7.3386, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.6867, 7.5967, 7.7268, 7.8558,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.6238, 8.5381, 8.6581, 8.5732,\n 8.4891, 8.4057, 8.5249, 8.4423, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.6921, 9.6148, 9.7224, 9.8293, 9.9357, 9.8590,\n 9.9648, 10.0701, 9.9940, 9.9184, 10.0231, 10.1273, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 10.9178, 10.8444, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.0913, 11.1883, 11.1164, 11.0450, 10.9740, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What eventually happened to the Block I program after the incident?\nContext: Crew members would also exclusively wear modified, fire-resistant Block II space suits, and would be designated by the Block II titles, regardless of whether a LM was present on the flight or not.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.6537, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 0.8716, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.8617, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.2492, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.9812, 0.9258, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 1.1746, 1.1239,\n 1.2603, 1.3957, 1.3448, 1.2943, 1.4284, 1.5617, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.4923, 1.4427, 1.5731, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.3453, 1.2982, 1.4241, 1.5492, 1.5020, 1.4551, 1.5791, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.6378, 1.5916, 1.7128, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.6150, 6.4550, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.2054, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.4000, 9.2717, 9.4087, 9.5443, 9.4188, 9.2952,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.4770, 10.3615, 10.4846, 10.6066,\n 10.4932, 10.3812, 10.2706, 10.3923, 10.5131, 10.6329, 10.5243, 10.6434,\n 10.7616, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.0254, 10.9229, 10.8215, 10.9355, 11.0488, 11.1614, 11.0615, 11.1734,\n 11.2846, 11.1860, 11.2966, 11.4065, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.6465, 11.5515, 11.6584, 11.5645, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.5005, 11.6059, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 12.9527, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.3447, 13.4390, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.5499, 13.4661, 13.5589, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.6698, 13.7612, 13.6796, 13.5985, 13.6896, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.8007, 13.7215, 13.8113, 13.9007, 13.9897, 14.0784, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What causes rock extension?\nContext: This is primarily accomplished through normal faulting and through the ductile stretching and thinning.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.5551, 0.7857, 1.0120, 0.9258,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.6586, 0.8337, 1.0070, 0.9428,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.8452, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.6222, 0.5680, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.2722, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.3563,\n 0.4885, 0.6199, 0.5740, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.5991, 0.5548, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.0226, 7.1857, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.5149, 8.6459, 8.7757, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.9547, 10.0698, 9.9783, 9.8877, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.8838, 10.7987, 10.9048, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.3610, 11.4614, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.7787, 11.8766, 11.7980, 11.8956, 11.8176, 11.9147, 12.0114,\n 12.1076, 12.0302, 11.9534, 12.0493, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.3888, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.7017, 12.7928, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What lead to the UK to subscribe to the agreement on Social Policy?\nContext: Following the election of the UK Labour Party to government in 1997, the UK formally subscribed to the Agreement on Social Policy, which allowed it to be included with minor amendments as the Social Chapter of the 1997 Treaty of Amsterdam.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.4791, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.2111, -2.2528, -2.2943, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -2.8887, -2.9241, -2.9593,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.0674, -3.1009, -3.1342, -3.1674, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.2651, 6.1283, 5.9944, 5.8635,\n 5.7354, 5.6099, 5.7877, 5.9628, 6.1355, 6.0125, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.3249, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.1176, 8.2488, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.2372, 8.1481, 8.2733, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.3813, 8.5030, 8.4173, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.2240, 9.3380, 9.2554, 9.3686, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.5714, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.9562, 10.0631, 9.9846, 9.9067, 9.8293, 9.7526, 9.6764,\n 9.6008, 9.7072, 9.6322, 9.7380, 9.8433, 9.9481, 9.8736, 9.9778,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.2607, 11.1883, 11.1164, 11.0450, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.0261, 11.1218, 11.2171, 11.3120, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who was the chairman of the House Committee on Energy and Commerce?\nContext: Sherwood Boehlert, chairman of the House Science Committee, said this was a \"misguided and illegitimate investigation\" apparently aimed at intimidating scientists, and at his request the U.S. National Academy of Sciences arranged for its National Research Council to set up a special investigation.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.3499, -0.1741, 0.0000, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.2247, -0.0896, 0.0447, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.1667,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.6030, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.9178, 8.8227, 8.9469,\n 8.8529, 8.7600, 8.8833, 9.0057, 9.1273, 9.0354, 8.9444, 8.8544,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.5620, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 9.9542, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.7367, 10.8423, 10.7594, 10.6771,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.8749, 10.7959, 10.7175, 10.6397, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.9936, 11.0937, 11.1933, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.3378, 11.2630, 11.3608, 11.4581, 11.3837, 11.4806, 11.4068,\n 11.3335, 11.2607, 11.3572, 11.4533, 11.5489, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.9487, 12.0419, 11.9701, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Article 34 meant states could be responsible for what?\nContext: It also means states can be responsible for private actors.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.1711, -1.0211, -1.0659, -1.1105, -0.9623,\n -0.8151, -0.8601, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.1584, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.2910, 7.1554, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 9.7738, 9.6638,\n 9.7897, 9.9146, 10.0385, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.1199, 10.0188, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.5236, 10.4263, 10.3301,\n 10.2348, 10.1405, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.7444, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.6356, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.5615, 12.4746, 12.3883, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.2593, 13.3537, 13.2690, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.5589, 13.6514, 13.7434, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.0248, 13.9427, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.7468, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where is the border of Swiss and Austria?\nContext: It is located at approximately 47\u00b039\u2032N 9\u00b019\u2032E\ufeff / \ufeff47.650\u00b0N 9.317\u00b0E\ufeff / 47.650; 9.317.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "32.0%", + "z-score": "2.26", + "p value": "0.0118", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.7889, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.8838, 2.0656,\n 1.9887, 1.9127, 2.0913, 2.2678, 2.1918, 2.1167, 2.0426, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.4077, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.2862, 2.2162, 2.3791, 2.3094, 2.2405, 2.1723, 2.3324, 2.2646,\n 2.1974, 2.3552, 2.5117, 2.4444, 2.3779, 2.3120, 2.4660, 2.4004,\n 2.3354, 2.4874, 2.6381, 2.5731, 2.5087, 2.4449, 2.3817, 2.3190,\n 2.2569, 2.1954, 2.3426, 2.2813, 2.2205, 2.1602, 2.3054, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.3529, 2.2943, 2.2361, 2.3764,\n 2.5156, 2.4574, 2.3995, 2.3422, 2.2852, 2.2287, 2.1726, 2.3094,\n 2.2535, 2.1980, 2.3333, 2.4678, 2.4122, 2.5456, 2.4902, 2.4351,\n 2.3805, 2.3262, 2.2723, 2.2188, 2.1656, 2.1128, 2.2436, 2.1909,\n 2.3206, 2.4495, 2.3967, 2.3443, 2.2923, 2.4198, 2.3679, 2.3163,\n 2.4426, 2.5683, 2.5166, 2.4653, 2.4142, 2.3635, 2.3131, 2.2630,\n 2.2132, 2.3368, 2.2871, 2.2377, 2.1886, 2.3110, 2.2620, 2.2133,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.1418, 2.0943, 2.2141, 2.3333,\n 2.2857, 2.2384, 2.1913, 2.3094, 2.2624])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.9245, 1.8034, 2.0605, 2.3113, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 1.9795, 1.8728, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.5621, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.4804, 2.3851, 2.2916, 2.1997, 2.4004, 2.5981,\n 2.5064, 2.7005, 2.8919, 3.0806, 2.9887, 3.1743, 3.3574, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.7087, 3.8819, 3.7916, 3.9624, 3.8730,\n 4.0415, 4.2080, 4.1192, 4.0316, 3.9452, 3.8600, 3.7758, 3.6927,\n 3.6107, 3.7732, 3.9340, 3.8523, 3.7717, 3.6920, 3.8503, 4.0069,\n 3.9276, 4.0825, 4.2359, 4.3879, 4.3086, 4.4590, 4.6079, 4.7556,\n 4.6765, 4.8226, 4.9675, 5.1111, 5.2535, 5.1745, 5.3156, 5.2372,\n 5.3769, 5.5155, 5.4377, 5.3606, 5.2842, 5.2086, 5.1338, 5.0596,\n 4.9862, 5.1225, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.3072,\n 5.2358, 5.3675, 5.4983, 5.6282, 5.5570, 5.6858, 5.8138, 5.9409,\n 5.8698, 5.9960, 6.1213, 6.2458, 6.3694, 6.2985, 6.4213, 6.3509,\n 6.4728, 6.4028, 6.3333, 6.2644, 6.1961, 6.1283, 6.0609, 5.9941,\n 5.9279, 6.0481, 6.1677, 6.1017, 6.0362, 5.9711, 6.0897, 6.2075,\n 6.1427, 6.2598, 6.1954, 6.1314, 6.0678, 6.0047, 6.1207, 6.2361,\n 6.3509, 6.2879, 6.2253, 6.3392, 6.4526, 6.5653, 6.6775, 6.7890,\n 6.9000, 7.0104, 7.1203, 7.0574, 7.1667, 7.2753, 7.2127, 7.3208,\n 7.2585, 7.3660, 7.4729, 7.5794, 7.5173, 7.6231, 7.7285, 7.6667,\n 7.7715, 7.7099, 7.8142, 7.9179, 8.0212, 7.9599, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What other location did Apollo 1 test at besides Kennedy Space Center?\nContext: They trained and conducted tests of their spacecraft at North American, and in the altitude chamber at the Kennedy Space Center.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, 0.1601, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.5855, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.1803, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, 0.1260, 0.0838, 0.2089, 0.3333,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.4878, 7.6613, 7.8320, 7.6667,\n 7.8355, 7.6751, 7.8420, 7.6862, 7.5340, 7.3853, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 9.1051, 8.9709, 8.8389, 8.9815,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.3812, 10.5027, 10.6232, 10.7429, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.4531, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.2314, 12.3377, 12.4434, 12.5485, 12.6529, 12.7567, 12.6557,\n 12.7590, 12.8618, 12.9639, 12.8645, 12.9662, 13.0674, 12.9692, 13.0699,\n 13.1701, 13.0732, 12.9771, 13.0771, 12.9820, 12.8877, 12.7943, 12.7017,\n 12.8017, 12.9011, 13.0000, 12.9085, 12.8179, 12.9165, 13.0146, 12.9249,\n 13.0226, 12.9337, 12.8456, 12.9430, 12.8556, 12.9527, 13.0493, 12.9628,\n 13.0590, 13.1547, 13.0690, 13.1644, 13.2593, 13.3537, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.4661, 13.5589, 13.4758, 13.5683, 13.6604, 13.7521,\n 13.6698, 13.7612, 13.6796, 13.5985, 13.6896, 13.7803, 13.8707, 13.9606,\n 13.8804, 13.9700, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.7673, 14.8530, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How much did Capital Cities Communications purchase ABC and its properties for?\nContext: Due to an FCC ban on same-market ownership of television and radio stations by a single company (although the deal would have otherwise complied with new ownership rules implemented by the FCC in January 1985, that allowed broadcasters to own a maximum of 12 television stations), ABC and Capital Cities respectively decided to sell WXYZ-TV and Tampa independent station WFTS-TV to the E. W. Scripps Company (although Capital Cities/ABC originally intended to seek a cross-ownership waiver to retain WXYZ and Capital Cities-owned radio stations WJR and WHYT).\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.3472, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -1.8593, -1.6823, -1.7303, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.7772, -1.8220, -1.8665, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.7217, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.7990, -1.8411, -1.8829, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -3.1327, -2.9976, -3.0317, -2.8977, -2.9320, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.7701, -2.8043, -2.8383, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.4413, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.4501, 6.3502, 6.2517, 6.1546,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.3248, 6.4663, 6.6066, 6.5169, 6.4283, 6.5672,\n 6.7049, 6.6171, 6.5303, 6.4444, 6.3595, 6.4957, 6.4116, 6.3283,\n 6.4632, 6.5970, 6.7298, 6.8615, 6.9923, 6.9094, 7.0391, 7.1678,\n 7.2956, 7.4225, 7.3402, 7.4661, 7.5910, 7.7152, 7.8384, 7.9608,\n 8.0824, 8.0006, 7.9196, 7.8393, 7.9600, 7.8803, 8.0002, 8.1192,\n 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.3906, 8.5052, 8.4286, 8.5424, 8.4664, 8.3910, 8.3162, 8.2420,\n 8.3550, 8.2813, 8.3937, 8.3205, 8.4322, 8.3595, 8.4706, 8.3984,\n 8.3268, 8.2557, 8.1851, 8.2954, 8.4050, 8.3349, 8.4439, 8.5524,\n 8.6603, 8.7676, 8.6978, 8.8045, 8.7351, 8.8413, 8.9469, 9.0520,\n 8.9830, 9.0876, 9.0190, 9.1230, 9.0549, 8.9872, 8.9199, 8.8531,\n 8.7867, 8.7207, 8.8240, 8.9268, 9.0292, 9.1310, 9.2324, 9.1667,\n 9.2676, 9.2022, 9.3026, 9.4026, 9.5021, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who painted the retable of St. George that is in the V&A collection?\nContext: One of the largest objects in the collection is the Spanish tempera on wood, 670 x 486 cm, retable of St George, c. 1400, consisting of numerous scenes and painted by Andr\u00e9s Marzal De Sax in Valencia.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.5170, -0.5717, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, 0.0538, 0.2144, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.3615, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.3404, 0.4845, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.5432, 5.4312, 5.3211, 5.4909, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.4006, 6.2994, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.3035, 6.2075, 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.5997,\n 6.7414, 6.8819, 6.7890, 6.6973, 6.6066, 6.7456, 6.8834, 6.7937,\n 6.9303, 6.8414, 6.7536, 6.6667, 6.8019, 6.7159, 6.8500, 6.7648,\n 6.6804, 6.5970, 6.7298, 6.6471, 6.5653, 6.6968, 6.8274, 6.9570,\n 7.0857, 7.2134, 7.3402, 7.2587, 7.3845, 7.3037, 7.2236, 7.3485,\n 7.4724, 7.5955, 7.7178, 7.6383, 7.7597, 7.6808, 7.8014, 7.7232,\n 7.8429, 7.9619, 8.0801, 8.1976, 8.3143, 8.4303, 8.5456, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.9221, 8.8448, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.2118, 9.1357, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.1846, 10.2872, 10.2132, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.5998, 10.5278, 10.6271, 10.7258,\n 10.8241, 10.9220, 11.0194, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.4525, 11.3820, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: According to a 1955 review, what were savings by the wealthy thought to offset?\nContext: According to a 1955 review, savings by the wealthy, if these increase with inequality, were thought to offset reduced consumer demand.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -1.8677, -1.9189, -1.9695,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.2222, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.3619, -2.4045, -2.4467, -2.2813, -2.3238, -2.3660, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.6934, -2.7325, -2.7714, -2.8101, -2.6554, -2.6943,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.9216, -2.7735, -2.8107, -2.8478, -2.7014, -2.7386])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.8041, 7.0200, 7.2296, 6.9293, 6.6469, 6.3805, 6.5997,\n 6.3509, 6.5672, 6.3333, 6.5465, 6.7543, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 7.1241, 7.3131, 7.4983, 7.6800, 7.4885, 7.6681,\n 7.4839, 7.6615, 7.8360, 8.0076, 7.8320, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.4856, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.9443, 8.7967, 8.6522, 8.8015, 8.9489,\n 9.0947, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.6612, 9.7980,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.4614, 10.5903, 10.7179, 10.5862,\n 10.7131, 10.8388, 10.9634, 11.0870, 11.2094, 11.3308, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 11.7978, 11.6743, 11.7907, 11.9062, 11.7851,\n 11.9001, 11.7809, 11.6632, 11.7779, 11.8918, 11.7762, 11.8896, 12.0021,\n 12.1139, 12.2248, 12.3350, 12.2222, 12.1107, 12.2207, 12.3299, 12.4384,\n 12.3289, 12.2207, 12.3289, 12.4365, 12.3299, 12.4370, 12.3317, 12.2275,\n 12.3343, 12.4405, 12.3377, 12.4434, 12.5485, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.4567, 12.5604, 12.6635, 12.5657, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.7812, 12.6867, 12.7875, 12.8877, 12.7943, 12.8942,\n 12.8017, 12.7100, 12.8095, 12.9085, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.2166, 13.1279, 13.2243, 13.3201, 13.2324, 13.1453,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.2593, 13.1745, 13.2690, 13.3631,\n 13.2791, 13.3728, 13.4661, 13.3829, 13.4758, 13.5683, 13.4859, 13.4040,\n 13.4963, 13.5881, 13.5069, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.7106, 13.6313, 13.7215, 13.8113, 13.7327, 13.8222, 13.7442, 13.6667,\n 13.7559, 13.8447, 13.7679, 13.8564, 13.9446, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What percentage of Warsaw's population was Protestant in 1901?\nContext: After the war, the new communist authorities of Poland discouraged church construction and only a small number were rebuilt.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.6547, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330, 2.1170,\n 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 3.5382,\n 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140, 4.5033, 4.7556, 5.0000,\n 4.8008, 5.0389, 4.8488, 5.0811, 4.8990, 5.1257, 4.9507, 5.1723, 5.3886,\n 5.2204, 5.4322, 5.6395, 5.4772, 5.3199, 5.1671, 5.3708, 5.5705, 5.7664,\n 5.9588, 6.1477, 6.3333, 6.5158, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568,\n 6.6172, 6.7893, 6.6531, 6.8229, 6.6896, 6.8573, 6.7269, 6.5991, 6.4738,\n 6.6395, 6.8031, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.7132, 6.8641,\n 6.7583, 6.6541, 6.5514, 6.4501, 6.3502, 6.4993, 6.4008, 6.5483, 6.4510,\n 6.3549, 6.2601, 6.4059, 6.3122, 6.4566, 6.3640, 6.2725, 6.4153, 6.3248,\n 6.2354, 6.1470, 6.2883, 6.2008, 6.3408, 6.2541, 6.1685, 6.0838, 6.2222,\n 6.3595, 6.4957, 6.4116, 6.3283, 6.4632, 6.5970, 6.5144, 6.6471, 6.5653,\n 6.6968, 6.8274, 6.7462, 6.8757, 7.0043, 7.1319, 7.0513, 6.9714, 7.0980,\n 7.2236, 7.1443, 7.2691, 7.1904, 7.3143, 7.2363, 7.3592, 7.2818, 7.4039,\n 7.3271, 7.4483, 7.3721, 7.2966, 7.4168, 7.5364, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.5495, 7.4762, 7.4034, 7.5204, 7.4482, 7.5644, 7.4927, 7.4215,\n 7.5369, 7.4662, 7.3960, 7.3263, 7.4409, 7.3717, 7.4855, 7.4168, 7.3485,\n 7.2807, 7.2134, 7.3263, 7.4386, 7.5504, 7.6615, 7.5944, 7.5277, 7.6381,\n 7.7480, 7.6816, 7.7908, 7.7249, 7.8335, 7.9415, 7.8759, 7.9833, 7.9181,\n 7.8533, 7.7889, 7.7249, 7.8316, 7.9377, 7.8740, 7.9796, 7.9162, 8.0212,\n 7.9582, 8.0627, 8.0000, 8.1039, 8.0416, 8.1449, 8.0829, 8.0212, 8.1240,\n 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: For which show did Billie Piper tape an introduction?\nContext: CBC began airing series two on 9 October 2006 at 20:00 E/P (20:30 in Newfoundland and Labrador), shortly after that day's CFL double header on Thanksgiving in most of the country.[citation needed]\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 1.0328,\n 0.9623, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 1.0719, 1.0070, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 1.0705, 1.0094, 0.9488, 0.8889, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.2276, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.4517, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.5303, 1.4792, 1.4284, 1.5617, 1.5110, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.6230, 1.5731, 1.7028, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.5159, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.3027, 4.1111, 3.9279, 4.1812, 4.4272, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 5.9628, 5.8398, 6.0125, 6.1828, 6.0622,\n 5.9438, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.4639, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.4449, 7.3485, 7.2532, 7.3901, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.0000, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.4423, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.7405,\n 9.8486, 9.7701, 9.8776, 9.7997, 9.7224, 9.6456, 9.5695, 9.4939,\n 9.6008, 9.7072, 9.6322, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.0668, 10.1690, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.3280, 10.2565, 10.3566, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.7090, 10.6389, 10.5692, 10.5000,\n 10.4312, 10.5286, 10.6256, 10.5573, 10.4893, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the seldom used force unit equal to one thousand newtons?\nContext: The kilogram-force leads to an alternate, but rarely used unit of mass: the metric slug (sometimes mug or hyl) is that mass that accelerates at 1 m\u00b7s\u22122 when subjected to a force of 1 kgf.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.9507, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.2796, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.2061, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.1500, 11.2623, 11.3740, 11.4849, 11.3842,\n 11.4945, 11.6041, 11.5048, 11.4065, 11.5157, 11.4184, 11.5271, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 11.8704, 11.9754, 11.8818,\n 11.9863, 12.0902, 11.9977, 12.1012, 12.2040, 12.1125, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.7279, 12.8267, 12.9249,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.6117, 13.7054, 13.7986, 13.8914, 13.8051, 13.8976,\n 13.9896, 13.9042, 13.8193, 13.9111, 13.8270, 13.9185, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.2046, 14.2939, 14.2121, 14.3011,\n 14.3897, 14.3087, 14.3970, 14.4850, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the name of the turf used in Levi's Stadium for the Super Bowl?\nContext: Concerns were raised over whether Levi's Stadium's field was of a high enough quality to host a Super Bowl; during the inaugural season, the field had to be re-sodded multiple times due to various issues, and during a week 6 game earlier in the 2015 season, a portion of the turf collapsed under Baltimore Ravens kicker Justin Tucker, causing him to slip and miss a field goal, although the field has not had any major issues since.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.4915, -0.5387, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.3698, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.3740, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.1261, 6.9903, 7.1554, 7.0226, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 7.9196, 7.8000, 7.9472, 8.0928, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 7.9216, 7.8113, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.0495, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.4346, 9.3408, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.0951, 9.2143, 9.1252, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.0133, 10.1243, 10.0389, 10.1494, 10.2592, 10.3683, 10.2837, 10.3923,\n 10.5002, 10.4164, 10.3333, 10.2509, 10.1692, 10.0881, 10.0076, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.1695, 10.0910, 10.1968, 10.3020, 10.2242,\n 10.1469, 10.2516, 10.1749, 10.2790, 10.3827, 10.4858, 10.4097, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.4909, 10.4170, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.3148, 10.4140, 10.5128, 10.4427, 10.5410, 10.6389, 10.5692, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.7222, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Which company is the gallery of Japanese art named after?\nContext: The museum also holds some cloisonn\u00e9 pieces from the Japanese art production company, Ando Cloisonn\u00e9.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "29.4%", + "z-score": "1.05", + "p value": "0.147", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.9258,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.2710, 1.2039, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 0.9488, 1.1111, 1.0507])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.7093,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.1890, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.3721, 9.5021, 9.6309, 9.5191, 9.6470, 9.7738, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.3473,\n 11.2427, 11.3555, 11.4675, 11.5788, 11.4759, 11.5866, 11.4849, 11.3842,\n 11.4945, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.8392,\n 11.9457, 11.8491, 11.7533, 11.6584, 11.7647, 11.6709, 11.5779, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.7108, 11.8151, 11.7249, 11.6356, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 12.1622, 12.2628, 12.1756,\n 12.0891, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.5129, 12.4289, 12.3455, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.7735, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.0688, 13.1617, 13.0821, 13.0030, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.5647, 13.4871, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.6155, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the name of the imperialistic policy in China?\nContext: The Age of Imperialism, a time period beginning around 1700, saw (generally European) industrializing nations engaging in the process of colonizing, influencing, and annexing other parts of the world in order to gain political power.[citation needed]\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.6591, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.7823,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.3957, -1.2521, -1.2943, -1.3362, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.1794, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.1921, -1.2326, -1.2730, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.2049, -1.2445, -1.2839, -1.1523, -1.1918,\n -1.0612, -0.9313, -0.9711, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.0012, 3.8490, 3.7017, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.3894, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 5.7877, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.3723, 6.2598,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.1291, 8.0370, 8.1651, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.7869, 9.7044, 9.6225,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.5366, 10.6397, 10.7423, 10.8443, 10.9458, 11.0468,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.0904, 11.0165, 10.9431, 10.8702, 10.9685, 10.8961,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.9701, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When was Luther's last sermon?\nContext: It was \"entirely devoted to the obdurate Jews, whom it was a matter of great urgency to expel from all German territory,\" according to L\u00e9on Poliakov.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.2158, 0.1721, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.2089, 0.1667,\n 0.1247, 0.2487, 0.2067, 0.3299, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.5924, 2.4495,\n 2.7136, 2.5744, 2.8301, 2.6943, 2.9424, 3.1844, 3.4207, 3.2863,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.3566, 3.5753, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.0446, 4.2426, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 5.9876,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.7242, 5.6220, 5.5213, 5.4222,\n 5.5811, 5.7382, 5.8936, 6.0474, 6.1996, 6.1012, 6.0041, 5.9084,\n 6.0587, 5.9641, 6.1128, 6.2601, 6.4059, 6.5504, 6.4566, 6.3640,\n 6.2725, 6.4153, 6.3248, 6.2354, 6.3768, 6.5169, 6.4283, 6.3408,\n 6.2541, 6.3928, 6.5303, 6.6667, 6.5807, 6.4957, 6.6308, 6.7648,\n 6.8977, 7.0296, 6.9451, 7.0759, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.1214, 8.0403, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.5516, 8.4718, 8.3927, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.9660, 8.8874, 8.8095, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.2118, 9.1357, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.9038, 9.8303, 9.7574, 9.6850, 9.7886, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.1262, 10.0547, 9.9837, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.4140, 10.3439, 10.2743, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.6944, 10.6256, 10.5573, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who did the BBC work with to reconstruct some of The Invasion episodes?\nContext: The BBC, in conjunction with animation studio Cosgrove Hall, reconstructed the missing episodes 1 and 4 of The Invasion (1968), using remastered audio tracks and the comprehensive stage notes for the original filming, for the serial's DVD release in November 2006.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.2780,\n -1.3195, -1.1794, -1.0401, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.9620, 3.8297, 4.0451, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 4.8038, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 5.8812, 5.7689, 5.9346, 6.0982, 5.9876,\n 5.8789, 5.7719, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 6.9945, 7.1393,\n 7.0379, 7.1813, 7.0812, 6.9824, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.4316, 7.3386, 7.2466,\n 7.1556, 7.0657, 6.9768, 7.1111, 7.2443, 7.3765, 7.2884, 7.2012,\n 7.1149, 7.0296, 6.9451, 7.0759, 6.9923, 7.1220, 7.2508, 7.1678,\n 7.0857, 7.2134, 7.3402, 7.4661, 7.3845, 7.3037, 7.4286, 7.5526,\n 7.6758, 7.7981, 7.9196, 8.0403, 7.9600, 7.8803, 8.0002, 8.1192,\n 8.2375, 8.3550, 8.2760, 8.1976, 8.1198, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.9567, 8.8800, 8.9912,\n 8.9151, 9.0257, 8.9502, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.3774, 9.4837, 9.4103, 9.3374, 9.2651,\n 9.1932, 9.1218, 9.2276, 9.3328, 9.2619, 9.3665, 9.4707, 9.4002,\n 9.3302, 9.2607, 9.1916, 9.1230, 9.2265, 9.1584, 9.2613, 9.1936,\n 9.2960, 9.2287, 9.3306, 9.2637, 9.3651, 9.2986, 9.3995, 9.3333,\n 9.4338, 9.3680, 9.4680, 9.4026, 9.5021, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When were Luther and his prospective bride engaged?\nContext: The ceremonial walk to the church and the wedding banquet were left out, and were made up two weeks later on 27 June.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "23.2%", + "z-score": "-0.465", + "p value": "0.679", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.4770, 8.3560, 8.4984, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.3152, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.5337, 8.6678, 8.8007, 8.6942, 8.8260, 8.9567,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.5556, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.0389, 11.1480, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.4261, 11.5311, 11.4425, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.7073, 12.6234, 12.7199, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 12.9621, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.1746, 13.2668, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.7442, 13.8333,\n 13.7559, 13.8447, 13.7679, 13.8564, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where in South Carolina did Huguenot nobility settle?\nContext: He became pastor of the first Huguenot church in North America in that city.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.6030, 0.7509, 0.8978, 0.8447, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.7192, 0.8601, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 1.0235, 0.9739, 1.1094, 1.0598, 1.0105, 0.9615, 0.9129,\n 1.0465, 0.9979, 0.9497, 0.9017, 1.0338, 0.9858, 0.9382, 0.8909,\n 1.0215, 1.1513, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.9313, 0.8866, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.2372, 8.3625, 8.2733, 8.3976, 8.3093, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.5607, 8.6783, 8.7952, 8.9113,\n 8.8294, 8.7482, 8.8636, 8.7831, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.8874, 9.0000, 8.9221, 8.8448, 8.9567, 8.8800, 8.9912,\n 9.1018, 9.2118, 9.1357, 9.0601, 9.1694, 9.2782, 9.3863, 9.3113,\n 9.2368, 9.3443, 9.2704, 9.3774, 9.3040, 9.2311, 9.1587, 9.2651,\n 9.3708, 9.4761, 9.4042, 9.3328, 9.4375, 9.3665, 9.4707, 9.5743,\n 9.6774, 9.6069, 9.5369, 9.6394, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.8054, 9.7367, 9.8373, 9.7690, 9.7011, 9.6336, 9.7337, 9.8333,\n 9.9325, 9.8654, 9.7987, 9.8974, 9.8311, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is colonialism's core meaning?\nContext: Colonialism is seen to be the architect deciding how to start dominating areas and then imperialism can be seen as creating the idea behind conquest cooperating with colonialism.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "23.3%", + "z-score": "-0.338", + "p value": "0.632", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -1.9630, -2.0381, -2.1111, -1.7457, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.0310, -0.7857, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.3379])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "60.0%", + "z-score": "10.2", + "p value": "7.73e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 5.4611, 5.7155, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 7.4194, 7.6120, 7.8003, 7.9845, 8.1650,\n 7.9398, 7.7232, 7.9048, 7.6980, 7.4983, 7.6800, 7.4885, 7.3030,\n 7.4839, 7.3051, 7.1317, 6.9631, 6.7992, 6.6398, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.4312, 6.5991, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.0934, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.9861, 8.8889, 8.7927, 8.6976, 8.8227, 8.9469,\n 8.8529, 8.9763, 8.8833, 8.7913, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.0370, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.4763, 9.3915, 9.3074,\n 9.4213, 9.5346, 9.4513, 9.3686, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.7619, 9.8712, 9.7908, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was Warsaw's population in 1901?\nContext: After the war, the new communist authorities of Poland discouraged church construction and only a small number were rebuilt.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.9180, 0.8520, 1.0289, 0.9631, 0.8980, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.2959, 1.4580, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.4664, 1.6222, 1.5613, 1.5010, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.5430, 1.6906, 1.8371,\n 1.9825, 2.1268, 2.0682, 2.0101, 1.9524, 1.8953, 2.0373, 1.9803,\n 2.1210, 2.0642, 2.0078, 2.1470, 2.2852, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.1980, 2.1429, 2.0881, 2.0338, 2.1685, 2.3022, 2.4351,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.3500, 2.4803, 2.4267, 2.5560,\n 2.5026, 2.4495, 2.5776, 2.7050, 2.6519, 2.5990, 2.5466, 2.4944,\n 2.6203, 2.5683, 2.5166, 2.4653, 2.5898, 2.7137, 2.8368, 2.9593,\n 2.9076, 2.8561, 2.8050, 2.7541, 2.8752, 2.8245, 2.9448, 2.8943,\n 2.8440, 2.9633, 3.0821, 3.0317, 2.9817, 2.9320, 2.8825, 3.0000,\n 2.9507, 2.9016, 2.8528, 2.9692, 3.0851, 3.2004, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.9472, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.0632, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.0171, 8.1550, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 8.7757, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.1380, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 9.1273, 9.0354, 9.1561, 9.0652,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.6210, 9.7356, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.3445, 10.4537, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.6306, 10.5475, 10.4652, 10.5714, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.5393, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.1018, 11.0235, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.3688, 11.4674, 11.3910, 11.3150,\n 11.2396, 11.3378, 11.4356, 11.3608, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.3335, 11.2607, 11.3572, 11.4533, 11.5489, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.7833, 11.8769, 11.8056, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: By what name was the Mongol army that finally conquered Bulgaria known?\nContext: The famous cavalry expedition led by Subutai and Jebe, in which they encircled the entire Caspian Sea defeating all armies in their path, remains unparalleled to this day, and word of the Mongol triumphs began to trickle to other nations, particularly Europe.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.5871, 0.5315, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.6030, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.8847, 0.8325, 0.7807, 0.7293, 0.8721, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.7303,\n 0.8645, 0.8165, 0.9497, 0.9017, 0.8540, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.8607, 0.9870, 0.9415, 0.8963, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.0106, 0.9659, 1.0890, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "58.1%", + "z-score": "10.7", + "p value": "2.96e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.0779, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 4.8712, 4.7488, 4.6291,\n 4.5118, 4.3970, 4.5850, 4.4721, 4.6571, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.5630, 5.4610, 5.6220, 5.7812, 5.9386,\n 5.8377, 5.9932, 6.1471, 6.0474, 6.1996, 6.3502, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 6.7886, 6.6935, 6.8354,\n 6.7414, 6.8819, 6.7890, 6.9282, 7.0662, 6.9743, 7.1110, 7.0201,\n 7.1556, 7.0657, 6.9768, 7.1111, 7.0231, 6.9361, 7.0692, 6.9830,\n 6.8977, 6.8133, 6.9451, 7.0759, 7.2058, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.4225, 7.3402, 7.4661, 7.3845, 7.5094, 7.4286, 7.3485,\n 7.2691, 7.1904, 7.3143, 7.4373, 7.5595, 7.6808, 7.8014, 7.7232,\n 7.8429, 7.7653, 7.8842, 8.0024, 7.9253, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.3140, 8.2381, 8.3526, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.2450, 9.3537, 9.2782, 9.3863, 9.3113,\n 9.2368, 9.1629, 9.0895, 9.1970, 9.1242, 9.0518, 8.9800, 9.0869,\n 9.0155, 9.1218, 9.0510, 9.1567, 9.2619, 9.1915, 9.1215, 9.2261,\n 9.1566, 9.2607, 9.3642, 9.4673, 9.5698, 9.6719, 9.6028, 9.7043,\n 9.8054, 9.9060, 10.0061, 10.1058, 10.0371, 10.1363, 10.2350, 10.1667,\n 10.2650, 10.3628, 10.4603, 10.5573, 10.6538, 10.7500])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What university donated the land for the Manned Spacecraft Center?\nContext: A site was chosen in Houston, Texas, on land donated by Rice University, and Administrator Webb announced the conversion on September 19, 1961.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "97", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "26.8%", + "z-score": "0.41", + "p value": "0.341", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.2563, 4.1265, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.0446, 3.9284, 3.8146, 3.7033,\n 3.9001, 4.0937, 3.9837, 3.8759, 4.0657, 3.9595, 4.1461, 4.3301,\n 4.5115, 4.6904, 4.8669, 5.0410, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.5035, 5.4000, 5.5630, 5.7242, 5.8835, 5.7812, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.7931, 6.6944, 6.8391, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.3625, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.5030, 8.6238, 8.5381, 8.6581, 8.7773,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.7952, 8.9113,\n 9.0267, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.2697, 9.1905, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.3212, 9.4299, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.6008, 9.7072, 9.6322, 9.7380, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.3154, 10.2419, 10.3435, 10.2706,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.5998, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.9220, 10.8505, 10.7795, 10.8770, 10.9740, 11.0705, 11.1667,\n 11.0961, 11.1919, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was given as the cause of death?\nContext: His body was later found by maid Alice Monaghan after she had entered Tesla's room, ignoring the \"do not disturb\" sign that Tesla had placed on his door two days earlier.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "25.0%", + "z-score": "0", + "p value": "0.5", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.0751, 5.9186, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.6995, 9.8198,\n 9.9392, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.2146, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.5632, 10.6733, 10.7828, 10.6936,\n 10.8025, 10.7141, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.2674, 11.1810, 11.0952, 11.2001, 11.3043, 11.2194, 11.1352, 11.2390,\n 11.1556, 11.0728, 10.9906, 10.9091, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.3402, 11.4411, 11.5414, 11.6412, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.9558, 12.0532, 11.9741, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.3163, 12.4109, 12.5049, 12.4283,\n 12.5221, 12.4460, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.9247, 12.8499, 12.7756, 12.8667, 12.9574, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How many plant species make up the total in the rainforest?\nContext: One in five of all the bird species in the world live in the rainforests of the Amazon, and one in five of the fish species live in Amazonian rivers and streams.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "10.6%", + "z-score": "-4.71", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.2733,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.5280, -3.5645,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.7439, -3.7791, -3.8142, -3.8490,\n -3.8837, -3.9181, -3.7619, -3.7966, -3.8312, -3.8655, -3.8997, -3.9337,\n -3.9675, -3.8150, -3.8490, -3.8829, -3.9166, -3.9501, -3.9835, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.9673, -4.0003, -4.0330, -4.0656, -4.0980,\n -4.1303, -4.1624, -4.1944, -4.2262, -4.2578, -4.2893, -4.1461, -4.1779,\n -4.2094, -4.2409, -4.2722, -4.3033, -4.3343, -4.3652, -4.3959, -4.4265,\n -4.4570, -4.4873, -4.5175, -4.5476, -4.5776, -4.6074, -4.6371, -4.6667,\n -4.6961, -4.7255, -4.5893, -4.6188, -4.6482, -4.6775, -4.7066])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "58.2%", + "z-score": "10.7", + "p value": "4e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.1107, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 5.9479, 6.1283, 5.9944, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 7.9216, 7.8113, 7.9530, 8.0934, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 9.0323, 8.9314, 8.8318,\n 8.7333, 8.6359, 8.5396, 8.6667, 8.7927, 8.6976, 8.6035, 8.7287,\n 8.6357, 8.7600, 8.6679, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.0389, 9.9542, 10.0647, 10.1745, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.1429, 10.0611, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.8515, 10.9545,\n 10.8749, 10.9773, 10.8984, 10.8200, 10.7423, 10.6650, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.6404, 10.7415, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.5453, 10.6455, 10.5725, 10.6722, 10.7714, 10.8702, 10.7978, 10.7258,\n 10.6544, 10.5833, 10.5128, 10.6111, 10.7090, 10.6389, 10.5692, 10.6667,\n 10.7637, 10.6944, 10.7910, 10.7222])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What Newcastle radio station is based at the Great North Children's Hospital?\nContext: Newcastle Student Radio is run by students from both of the city's universities, broadcasting from Newcastle University's student's union building during term time.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.7154, -1.7614, -1.8071, -1.6407, -1.4757,\n -1.5221, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.3833, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.7609, -1.7997, -1.8383, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.6830, -1.5492, -1.5878, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321, 2.1004, 2.4495,\n 2.7815, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284, 3.1177, 3.3968, 3.6667,\n 3.9279, 3.7524, 3.5839, 3.4219, 3.2660, 3.5176, 3.3665, 3.6108, 3.4641,\n 3.7017, 3.5590, 3.4207, 3.6515, 3.5165, 3.7417, 3.6098, 3.8297, 4.0451,\n 3.9158, 4.1265, 4.3333, 4.5363, 4.4091, 4.6082, 4.4836, 4.3618, 4.5569,\n 4.7488, 4.9377, 5.1236, 5.3067, 5.1864, 5.0684, 4.9528, 4.8394, 5.0190,\n 4.9075, 5.0844, 4.9747, 5.1490, 5.0410, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.4000, 5.2981, 5.4610, 5.6220, 5.7812, 5.6804, 5.8377,\n 5.7382, 5.6401, 5.7955, 5.9491, 6.1012, 6.2517, 6.4008, 6.3035, 6.2075,\n 6.1128, 6.0193, 6.1664, 6.0740, 6.2197, 6.1283, 6.2725, 6.1820, 6.0927,\n 6.2354, 6.1470, 6.2883, 6.2008, 6.3408, 6.4795, 6.3928, 6.5303, 6.6667,\n 6.8019, 6.7159, 6.8500, 6.7648, 6.6804, 6.8133, 6.9451, 7.0759, 7.2058,\n 7.3346, 7.2508, 7.1678, 7.0857, 7.0043, 7.1319, 7.0513, 7.1779, 7.0980,\n 7.2236, 7.1443, 7.0658, 7.1904, 7.1125, 7.2363, 7.1590, 7.2818, 7.4039,\n 7.3271, 7.4483, 7.5687, 7.6883, 7.6120, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.0880, 8.0139, 7.9403, 7.8673, 7.9816,\n 7.9091, 8.0227, 7.9507, 8.0636, 7.9921, 7.9211, 8.0333, 7.9628, 8.0742,\n 8.0042, 8.1150, 8.2252, 8.1556, 8.2652, 8.3742, 8.4826, 8.4133, 8.5212,\n 8.4523, 8.3840, 8.4911, 8.5978, 8.7039, 8.8094, 8.9145, 8.8464, 8.7788,\n 8.7116, 8.6448, 8.7492, 8.6828, 8.7867, 8.7207, 8.8240, 8.7584, 8.6932,\n 8.7959, 8.7311, 8.8333, 8.7689, 8.8706, 8.9718, 8.9077, 9.0085, 9.1088,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: the Great Internet Mersenne Prime Search, what was the prize for finding a prime with at least 10 million digits?\nContext: In 2009, the Great Internet Mersenne Prime Search project was awarded a US$100,000 prize for first discovering a prime with at least 10 million digits.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -0.9238, -0.9766, -1.0290, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.3122, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.5492, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 5.9604, 6.1968, 6.4254, 6.1546, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.5465, 6.7543, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 7.1241, 6.9282, 6.7390, 6.9307, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.0000,\n 7.8355, 8.0017, 8.1654, 8.0064, 8.1684, 8.0139, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.8082, 8.6702, 8.8168, 8.9618, 9.1051, 8.9709, 9.1130, 8.9815,\n 8.8522, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 9.8271, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.9754, 9.8590, 9.7442, 9.8716, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.1419, 12.2474,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.3655, 12.4689, 12.3729, 12.2778,\n 12.3809, 12.2868, 12.3895, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.0000, 12.9085, 13.0071, 13.1050, 13.0146, 13.1122,\n 13.2093, 13.1198, 13.0311, 13.1279, 13.2243, 13.3201, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.6117, 13.7054, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 14.1725, 14.2633, 14.3537, 14.4437, 14.3587, 14.4484,\n 14.5378, 14.6267, 14.7152, 14.6313, 14.5479, 14.6362, 14.5535, 14.6416,\n 14.7293, 14.8167, 14.7348, 14.8219, 14.7406, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: When was the St. Bartholomew's Day Massacre?\nContext: In what became known as the St. Bartholomew's Day Massacre of 24 August \u2013 3 October 1572, Catholics killed thousands of Huguenots in Paris.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -1.8071, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -1.9379, -1.9803,\n -2.0224, -1.8676, -1.9098, -1.9518, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -1.8571, -1.7085, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -1.9985, -2.0369, -2.0751, -1.9370, -1.9753, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -1.9311, -1.7974, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -2.0726, -1.9432, -1.9795, -2.0156, -2.0515, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 8.7986, 8.9355, 8.8192, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.6061, 10.7211, 10.6218, 10.5236, 10.6380, 10.7517,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.2414, 11.3497, 11.2564, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.3944, 11.5005, 11.6059, 11.5156, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.1492, 12.2503, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.7199, 12.6367, 12.7329, 12.8285,\n 12.9238, 12.8414, 12.9363, 13.0307, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.5985, 13.5179, 13.6091, 13.5292, 13.4499,\n 13.5408, 13.6313, 13.5526, 13.6429, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.0214, 14.1091, 14.0324, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What do these teachers NOT do?\nContext: These teachers do not teach by rote but attempt to find new invigoration for the course materials on a daily basis.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.6086, 0.7868, 0.7223, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.0155, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -0.8513,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "166", + "Fraction of T in Greenlist": "83.4%", + "z-score": "19", + "p value": "4.71e-81", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.1968, 6.4254, 6.6469, 6.8620, 6.5997,\n 6.8127, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 7.9048, 8.0829, 8.2577, 8.4293, 8.5979, 8.7636,\n 8.9265, 9.0869, 9.2447, 9.0520, 9.2091, 9.3638, 9.5163, 9.3333,\n 9.4850, 9.3081, 9.4589, 9.6077, 9.7545, 9.8995, 10.0426, 10.1840,\n 10.3237, 10.4618, 10.2976, 10.4350, 10.5709, 10.7052, 10.8382, 10.9697,\n 11.0998, 11.2286, 11.3561, 11.2022, 11.3293, 11.4551, 11.5797, 11.4310,\n 11.5551, 11.4097, 11.5333, 11.6559, 11.7773, 11.8977, 12.0170, 12.1353,\n 12.2527, 12.3690, 12.2309, 12.3468, 12.4619, 12.5760, 12.6892, 12.8015,\n 12.9130, 13.0236, 13.1334, 13.0017, 13.1112, 13.2199, 13.3279, 13.1993,\n 13.3070, 13.1806, 13.2879, 13.3945, 13.5004, 13.6056, 13.7100, 13.8138,\n 13.9169, 14.0193, 13.8978, 14.0000, 14.1015, 14.2024, 14.3027, 14.4024,\n 14.5014, 14.5999, 14.6978, 14.5807, 14.6784, 14.7755, 14.8721, 14.7573,\n 14.8536, 14.7404, 14.8365, 14.9321, 15.0272, 15.1217, 15.2158, 15.3093,\n 15.4024, 15.4949, 15.3852, 15.4776, 15.5695, 15.6609, 15.7519, 15.8424,\n 15.9324, 16.0220, 16.1112, 16.0048, 16.0938, 16.1824, 16.2705, 16.1658,\n 16.2538, 16.1503, 16.2381, 16.3255, 16.4125, 16.4992, 16.5854, 16.6712,\n 16.7567, 16.8418, 16.7410, 16.8259, 16.9105, 16.9947, 17.0785, 17.1620,\n 17.2451, 17.3279, 17.4103, 17.3121, 17.3944, 17.4763, 17.5579, 17.4611,\n 17.5426, 17.4466, 17.5280, 17.6090, 17.6897, 17.7701, 17.8502, 17.9300,\n 18.0095, 18.0886, 17.9949, 18.0739, 18.1527, 18.2311, 18.3093, 18.3871,\n 18.4647, 18.5420, 18.6190, 18.5273, 18.6043, 18.6809, 18.7572, 18.6667,\n 18.7429, 18.6531, 18.7292, 18.8051, 18.8807, 18.9561, 19.0312])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What role in economics did the university play a major part in?\nContext: In economics, the university has played an important role in shaping ideas about the free market and is the namesake of the Chicago school of economics, the school of economic thought supported by Milton Friedman and other economists.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 1.0445, 1.2910, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.8980, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 1.0705, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.3590, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.3651,\n 0.5005, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.1684, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "168", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "73.2%", + "z-score": "14.4", + "p value": "1.62e-47", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.4915, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 3.7009, 3.5753, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.4101, 3.2998, 3.5032, 3.7033,\n 3.5942, 3.4873, 3.3824, 3.5777, 3.7700, 3.9595, 4.1461, 4.3301,\n 4.5115, 4.6904, 4.8669, 5.0410, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 5.9874, 6.1450, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.3496, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.9690, 14.0619, 13.9735, 14.0660, 14.1582, 14.2499, 14.3412, 14.4321])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What is the estimate of how many physicians give out drugs on their own?\nContext: 7 to 10 percent of American physicians practices reportedly dispense drugs on their own.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "146", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.58", + "p value": "0.00494", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.0211, 2.2418, 2.1412, 2.3570, 2.2576, 2.1602,\n 2.3706, 2.5775, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.2404, 2.4327, 2.6222, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.6491, 2.5660, 2.4841, 2.4034, 2.5820,\n 2.5019, 2.6778, 2.8518, 2.7717, 2.6928, 2.6148, 2.5378, 2.7080,\n 2.6316, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.3570,\n 2.5207, 2.6828, 2.6112, 2.5403, 2.7001, 2.6296, 2.5600, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.6667, 2.5991, 2.7524, 2.6852, 2.6186,\n 2.5527, 2.4874, 2.6381, 2.5731, 2.7222, 2.6575, 2.5934, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.4887, 2.6336, 2.5717, 2.5103, 2.6536,\n 2.5925, 2.5318, 2.6735, 2.6131, 2.5532, 2.4938, 2.6336, 2.5744,\n 2.7129, 2.6540, 2.5954, 2.5373, 2.4797, 2.6163, 2.5589, 2.6943,\n 2.6370, 2.5802])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 7.1241, 7.3131, 7.4983, 7.3054, 7.4885, 7.3030,\n 7.4839, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.9672, 9.1201, 8.9567, 8.7970, 8.9496,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.0924, 9.2388, 9.0924, 8.9489,\n 9.0947, 9.2387, 9.0990, 8.9618, 8.8271, 8.6948, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.6192, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.3721, 9.2600, 9.3901, 9.2796, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.6251, 9.5294, 9.6490, 9.7678, 9.6732, 9.5795, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.5400, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.5057, 9.4185, 9.5338, 9.6484, 9.5620, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.4513, 9.3686, 9.4812, 9.5931, 9.5112, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.4124, 9.3338, 9.4438, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.4299, 9.5381, 9.4619, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.7380, 9.8433, 9.9481, 10.0523, 9.9778,\n 9.9038, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.1690, 10.2706,\n 10.1981, 10.1262, 10.0547, 10.1558, 10.0848, 10.1855, 10.2856, 10.2151,\n 10.1450, 10.0753, 10.0061, 9.9374, 10.0371, 10.1363, 10.0679, 10.0000,\n 9.9325, 9.8654, 9.7987, 9.7325, 9.6666, 9.6011, 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was Warsaw ranked the 7th greatest of?\nContext: It also has one of the fastest growing economies, with GDP growth at 6.5 percent in 2007 and 6.1 percent in the first quarter of 2008.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.6667,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.7285, 1.6348, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.5635, 1.7634, 1.6803, 1.5986, 1.7942, 1.7130, 1.9052,\n 2.0948, 2.2819, 2.4667, 2.6491, 2.5660, 2.4841, 2.4034, 2.3238,\n 2.2453, 2.4228, 2.5983, 2.5198, 2.6928, 2.6148, 2.5378, 2.4618,\n 2.3868, 2.3126, 2.2393, 2.1669, 2.0954, 2.0247, 1.9548, 2.1213,\n 2.0517, 2.2162, 2.1470, 2.0785, 2.0107, 1.9437, 2.1049, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 2.0276, 2.1822,\n 2.1182, 2.0548, 1.9920, 2.1442, 2.0817, 2.0197, 1.9582, 2.1082,\n 2.0470, 2.1954, 2.1344, 2.0739, 2.0140, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.8664, 1.8091, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.8676, 2.0078, 1.9518, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.1513, 1.2804, 1.4087, 1.5363, 1.4881, 1.4402, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.7213, 1.6737, 1.6262, 1.7498, 1.7025,\n 1.8252, 1.7780, 1.7310, 1.6843, 1.8058, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.9068, 1.8605, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.4017, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.3231, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 10.1124, 9.9980, 10.1234, 10.0107, 10.1352,\n 10.0242, 9.9146, 10.0385, 9.9304, 10.0535, 9.9469, 10.0692, 9.9640,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.1860, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.0902, 12.1936, 12.2963, 12.3985, 12.5001, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.9085, 13.0071, 12.9165, 13.0146, 13.1122,\n 13.0226, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.6188, 13.7122, 13.8051, 13.7194,\n 13.6343, 13.7270, 13.8193, 13.9111, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.3700, 14.4591, 14.5479, 14.6362, 14.5535, 14.4714,\n 14.5595, 14.6473, 14.5659, 14.4850, 14.4046, 14.3248, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.4294, 14.5162, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was the Marburg Colloquy meant to establish?\nContext: Agreement was achieved on fourteen points out of fifteen, the exception being the nature of the Eucharist \u2013 the sacrament of the Lord's Supper\u2014an issue crucial to Luther.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.9869, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.5119, 1.4403, 1.6187, 1.5475, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.6823, 1.6186, 1.7778, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 1.0973, 1.2423, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.1183, 1.0659, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129,\n 0.8645, 0.8165, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.8154, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.8866, 0.8422, 0.7979, 0.7539, 0.7102, 0.8333,\n 0.7896, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.1143, 5.8889, 6.1101, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.9186, 6.1107, 5.9588, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.6395,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.0652,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.4501, 9.5668, 9.4778, 9.3897,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.4769, 10.5848,\n 10.5002, 10.4164, 10.5238, 10.6306, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.8028, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.4223, 12.5179, 12.6130, 12.7077,\n 12.8019, 12.8957, 12.9891, 13.0821, 13.0030, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.5647, 13.4871, 13.5771, 13.6667,\n 13.7559, 13.8447, 13.9332, 14.0214, 14.1091, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What river runs through Warsaw?\nContext: Its population is estimated at 1.740 million residents within a greater metropolitan area of 2.666 million residents, which makes Warsaw the 9th most-populous capital city in the European Union.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "34.9%", + "z-score": "2.09", + "p value": "0.0183", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.4968, 1.7408, 1.9795, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.9462, 2.1602,\n 2.0647, 1.9711, 2.1798, 2.0870, 1.9959, 2.1997, 2.1094, 2.3094,\n 2.2200, 2.1320, 2.0455, 2.2404, 2.1546, 2.0702, 1.9870, 2.1773,\n 2.0948, 2.2819, 2.2000, 2.3842, 2.5660, 2.4841, 2.4034, 2.3238,\n 2.2453, 2.1678, 2.0913])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.0179, 9.1493, 9.0401, 8.9324, 8.8260, 8.9567,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 9.9392, 9.8414, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.3695, 10.4829, 10.5955, 10.7074, 10.8186,\n 10.9291, 10.8363, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.4101, 11.5156, 11.6206, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.7543, 11.8571, 11.7696, 11.8719, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.2034, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.3455, 12.2627, 12.1805, 12.2782, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.0171, 12.9391,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.1520, 13.2429, 13.3333,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who designed the Scottish Parliament building?\nContext: Since September 2004, the official home of the Scottish Parliament has been a new Scottish Parliament Building, in the Holyrood area of Edinburgh.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.7457, 2.0370, 1.8974, 2.1776, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.9245, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.8489, 1.7408, 1.9795, 1.8728, 1.7685, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.8791, 1.7889, 1.9959, 2.1997, 2.1094, 2.3094,\n 2.5064, 2.4163, 2.6098, 2.5205, 2.7107, 2.8983, 2.8093, 2.9938,\n 2.9057, 2.8189, 2.7333, 2.9140, 2.8292, 3.0071, 2.9231, 3.0984,\n 3.2717, 3.1879, 3.3587, 3.2757, 3.1937, 3.1129, 3.2806, 3.2004,\n 3.3659, 3.2863, 3.4498, 3.6116, 3.5322, 3.6920, 3.6133, 3.5355,\n 3.4586, 3.6159, 3.5396, 3.6950, 3.6193, 3.7730, 3.9253, 3.8497,\n 4.0004, 3.9254, 3.8512, 3.7778, 3.7051, 3.8534, 4.0004, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.5762, 3.5079, 3.4402, 3.3731,\n 3.3066, 3.2408, 3.1755, 3.1109, 3.0467, 2.9832, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.6735, 2.6131, 2.5532, 2.4938, 2.4348, 2.3764,\n 2.3183, 2.2608, 2.2037, 2.1470, 2.0907, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.9631, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.3659, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 8.2054, 8.3589, 8.2195, 8.3716,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 9.2620, 9.1333, 9.2717, 9.4087, 9.5443, 9.4188, 9.5534,\n 9.6867, 9.5637, 9.6960, 9.8271, 9.7065, 9.8367, 9.9656, 10.0935,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.7429, 10.8616, 10.9794, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.5718, 11.6829, 11.7932, 11.6894, 11.7992, 11.9083, 11.8058,\n 11.9144, 12.0223, 12.1295, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.4567, 12.5604, 12.6635, 12.5657, 12.6684, 12.7704, 12.6739,\n 12.7755, 12.8766, 12.9771, 12.8819, 12.9820, 13.0815, 13.1806, 13.2791,\n 13.3770, 13.4745, 13.3810, 13.4780, 13.5746, 13.4822, 13.5784, 13.6742,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.7772, 13.8713, 13.9650, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.2464, 14.3380, 14.4292, 14.3412, 14.4321,\n 14.5226, 14.4355, 14.5257, 14.6155, 14.7049, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.0629, 15.1498, 15.2364, 15.1524,\n 15.2387, 15.3247, 15.2414, 15.3272, 15.4126, 15.4976, 15.4152, 15.5000,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What portion of Berlin's population spoke French by 1700?\nContext: By 1700, one-fifth of the city's population was French speaking.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 1.0999, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.9335, 1.8477, 2.0455, 1.9604, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.6239, 1.8074,\n 1.7321, 1.6577, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.7233,\n 1.6524, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.8040, 0.7509, 0.8978, 1.0435, 1.1882,\n 1.1345, 1.0812, 1.2243, 1.1711, 1.3128, 1.2597, 1.2070, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.3841, 0.3405,\n 0.2971, 0.4233, 0.3800, 0.3369, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 2.0494, 2.3094, 2.1831, 2.0605, 2.3113, 2.5560,\n 2.7952, 2.6726, 2.9055, 2.7852, 3.0123, 3.2348, 3.1160, 3.3333,\n 3.2167, 3.1027, 2.9913, 2.8823, 3.0929, 2.9856, 3.1918, 3.0861,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.4743, 3.3729, 3.5642, 3.7528,\n 3.9386, 3.8376, 4.0205, 3.9208, 3.8228, 3.7264, 3.6315, 3.8103,\n 3.7166, 3.8927, 3.8000, 3.7087, 3.8819, 4.0531, 4.2222, 4.1312,\n 4.0415, 4.2080, 4.3727, 4.2836, 4.1957, 4.1090, 4.2710, 4.4313,\n 4.5899, 4.5035, 4.4182, 4.5747, 4.7296, 4.6448, 4.5611, 4.7140,\n 4.6311, 4.7823, 4.9322, 4.8497, 4.9980, 4.9163, 4.8355, 4.7556,\n 4.6765, 4.8226, 4.7442, 4.8889, 4.8111, 4.7341, 4.8772, 5.0190,\n 4.9424, 4.8666, 4.7916, 4.9317, 5.0707, 4.9960, 4.9221, 4.8488,\n 4.9862, 5.1225, 5.2578, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.0325, 5.1650, 5.0948, 5.2262, 5.3567, 5.2868, 5.4163, 5.3468,\n 5.2779, 5.2096, 5.1419, 5.2699, 5.2025, 5.3295, 5.2626, 5.1962,\n 5.1303, 5.2560, 5.1905, 5.1255, 5.0609, 5.1854, 5.3092, 5.2449,\n 5.1810, 5.1177, 5.2402, 5.3621, 5.4832, 5.4199, 5.3571, 5.4772,\n 5.5967, 5.5340, 5.6527, 5.7707, 5.8880, 5.8254, 5.9420, 6.0579,\n 5.9956, 6.1107, 6.0487, 5.9871, 5.9258, 5.8650, 5.9792, 5.9186,\n 6.0321, 5.9718, 5.9120, 6.0246, 6.1367, 6.0770, 6.1884, 6.2993,\n 6.2398, 6.3500, 6.4597, 6.4004, 6.3414, 6.2828, 6.3917, 6.5000,\n 6.6078, 6.5493, 6.4912, 6.5983, 6.7049, 6.6469, 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Who was responsible for the new building projects in Jacksonville?\nContext: Mayor W. Haydon Burns' Jacksonville Story resulted in the construction of a new city hall, civic auditorium, public library and other projects that created a dynamic sense of civic pride.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.8617, 0.8003, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 1.0973, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.2243, 1.1711, 1.1183, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.2441, 1.3779, 1.3278, 1.2780,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.2136, 1.3443, 1.2956, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.4087, 1.3607, 1.3131, 1.2657, 1.3926,\n 1.5189, 1.4713, 1.4241, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.4857, 1.4393, 1.5621, 1.5159, 1.4699, 1.4241, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.5298, 1.6496, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.4878, 7.6613, 7.4952, 7.3333,\n 7.5056, 7.6751, 7.8420, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.5010, 8.6522, 8.8015, 8.6603,\n 8.8082, 8.9544, 9.0990, 9.2418, 9.3831, 9.5229, 9.3871, 9.5258,\n 9.6630, 9.5304, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.0698,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.4608, 10.5859,\n 10.7098, 10.5893, 10.4704, 10.5940, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.0851, 11.2025, 11.3189, 11.4345, 11.3228,\n 11.4378, 11.5519, 11.4420, 11.5556, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.0044, 12.1136, 12.2221, 12.1164, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.5460, 12.4434, 12.5485, 12.6529, 12.7567, 12.8598,\n 12.9624, 12.8618, 12.9639, 13.0655, 12.9662, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.0732, 13.1730, 13.2722, 13.3710, 13.4691, 13.3737, 13.4715,\n 13.5688, 13.6656, 13.5714, 13.6679, 13.7638, 13.6707, 13.7663, 13.8615,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.1458, 14.2388, 14.3313, 14.2408,\n 14.3330, 14.4248, 14.5161, 14.6070, 14.6976, 14.6084, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.8789, 14.9677, 14.8804, 14.9689, 15.0570, 14.9707,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.0629, 15.1498, 15.2364, 15.3226,\n 15.4085, 15.3247, 15.4103, 15.4956, 15.5805, 15.4976, 15.5823, 15.6667,\n 15.5845, 15.6686, 15.7524, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What did Luther explain about acquiring God's grace?\nContext: In this work, one of his most emphatic statements on faith, he argued that every good work designed to attract God's favor is a sin.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.6868, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.7139, -0.7539, -0.7937, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 8.0632, 7.9530, 7.8444, 7.9849, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.7482, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.0926, 10.0021,\n 9.9124, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.2106, 10.1243, 10.2348, 10.3445, 10.4537, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.5475, 10.4652, 10.3835, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.5393, 10.6439, 10.5642, 10.4852, 10.4067,\n 10.5109, 10.6145, 10.5366, 10.6397, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.5109, 11.6082, 11.5329, 11.6297, 11.5549, 11.6514, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.7901, 11.8849, 11.9792, 11.9060, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.0419, 12.1347, 12.0630, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Where was war fought?\nContext: The war was fought primarily along the frontiers between New France and the British colonies, from Virginia in the South to Nova Scotia in the North.\nAnswer:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.4%", + "z-score": "0.452", + "p value": "0.325", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.3365, 0.2791, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.4082,\n 0.5592, 0.5064, 0.6558, 0.6030, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.3698, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.4944, 0.4481, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.3928, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.5164, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.6367, 0.5927, 0.5489, 0.6737, 0.6299, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "63.5%", + "z-score": "12.5", + "p value": "5.88e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.1241, 5.3134, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.3249, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.1873, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.4225, 9.3326, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.0133, 9.9278, 9.8430, 9.7590, 9.6757, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.6016, 9.7109, 9.8197, 9.9278,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.2753, 10.3805, 10.3020, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.4594, 10.5625, 10.6650, 10.7671, 10.6904,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.5261, 11.6217, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.4638])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: In what year did Harvard Stadium become the first ever concrete reinforced stadium in the country?\nContext: While Harvard's football team is no longer one of the country's best as it often was a century ago during football's early days (it won the Rose Bowl in 1920), both it and Yale have influenced the way the game is played.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "119", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.265", + "p value": "0.396", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.2646])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "69.2%", + "z-score": "14.4", + "p value": "4.57e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 6.6469, 6.8620, 7.0711,\n 7.2746, 7.4730, 7.6667, 7.8558, 8.0408, 8.2219, 7.9845, 8.1650,\n 8.3418, 8.1192, 7.9048, 8.0829, 8.2577, 8.4293, 8.5979, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.7039, 8.8648, 8.6828, 8.8426, 9.0000,\n 9.1551, 9.3081, 9.4589, 9.2874, 9.4373, 9.5852, 9.7312, 9.8754,\n 10.0178, 9.8553, 9.6962, 9.8387, 9.9795, 10.1187, 10.2562, 10.3923,\n 10.5269, 10.3758, 10.5096, 10.6421, 10.7732, 10.9030, 11.0315, 11.1588,\n 11.2848, 11.4097, 11.2667, 11.3910, 11.5142, 11.6362, 11.7572, 11.8771,\n 11.9961, 11.8589, 11.9774, 12.0949, 12.2114, 12.0779, 12.1940, 12.3091,\n 12.4234, 12.5368, 12.6493, 12.7609, 12.6322, 12.7435, 12.8540, 12.9636,\n 13.0725, 13.1806, 13.2879, 13.1636, 13.2706, 13.3769, 13.4825, 13.5873,\n 13.4661, 13.5707, 13.4513, 13.3333, 13.2167, 13.3217, 13.2067, 13.0931,\n 13.1979, 13.0859, 12.9750, 13.0798, 12.9704, 12.8622, 12.9668, 12.8599,\n 12.9641, 12.8586, 12.7542, 12.8582, 12.7550, 12.8586, 12.9616, 12.8598,\n 12.9624, 12.8618, 12.7622, 12.6635, 12.7660, 12.8679, 12.9692, 12.8719,\n 12.7755, 12.8766, 12.9771, 13.0771, 13.1765, 13.2753, 13.1806, 13.0866,\n 13.1852, 13.2834, 13.1905, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.5827, 13.4920, 13.5876, 13.6826, 13.7772, 13.6876, 13.7818, 13.6931,\n 13.6050, 13.6990, 13.7926, 13.8857, 13.7986, 13.8914, 13.9838, 14.0758,\n 13.9896, 13.9042, 13.8193, 13.9111, 14.0025, 14.0936, 14.1842, 14.2744,\n 14.1906, 14.1074, 14.1974, 14.1149, 14.0329, 13.9515, 13.8707, 13.7904,\n 13.8804, 13.9700, 14.0593, 14.1482, 14.2367, 14.1573, 14.0784, 14.1667,\n 14.2546, 14.3422, 14.2640, 14.3513, 14.4382, 14.3607])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: How much time was left in the game when Denver took the score to 24-10?\nContext: With 4:51 left in regulation, Carolina got the ball on their own 24-yard line with a chance to mount a game-winning drive, and soon faced 3rd-and-9.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -0.9949, -1.0580, -0.8402, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.2443, -1.2910, -1.1316, -0.9734, -1.0206,\n -0.8642, -0.7089, -0.7566, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.0235, -1.0666, -1.1094, -0.9676, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.0106, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.3221, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.1355, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 7.9455, 8.0822, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 8.9763, 8.8833, 8.7913, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.6828, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.2514, 10.3630, 10.4738, 10.5841, 10.4956,\n 10.6052, 10.7141, 10.6265, 10.7349, 10.8426, 10.7559, 10.8631, 10.7772,\n 10.6920, 10.6076, 10.5238, 10.6306, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.4009, 11.3196,\n 11.4209, 11.3402, 11.4411, 11.5414, 11.6412, 11.7405, 11.8393, 11.7595,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.5495, 12.6439, 12.5657, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.8007, 12.7248, 12.6494, 12.5745, 12.6667,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What kind of needlework was used in the creation of the Bayeux Tapestry?\nContext: It was commissioned by Odo, the Bishop of Bayeux and first Earl of Kent, employing natives from Kent who were learned in the Nordic traditions imported in the previous half century by the Danish Vikings.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "27.7%", + "z-score": "0.57", + "p value": "0.284", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.1111, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.3422, 9.2376, 9.3641, 9.2609, 9.3865, 9.2847,\n 9.4094, 9.3088, 9.4327, 9.5556, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.2348, 10.3496, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.5427, 10.6534, 10.5632, 10.4738, 10.3853, 10.4956,\n 10.6052, 10.7141, 10.6265, 10.5397, 10.4537, 10.3683, 10.2837, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.2509, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 10.8515, 10.9545,\n 10.8749, 10.7959, 10.8984, 11.0004, 10.9220, 11.0235, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.0937, 11.0177, 10.9422, 11.0418, 10.9669,\n 10.8925, 10.9917, 11.0904, 11.0165, 10.9431, 10.8702, 10.7978, 10.7258,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.7090, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.6944, 10.6256, 10.7222, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: Why was NBC unable to broadcast the coronation of Queen Elizabeth II?\nContext: The first attempts to internationalize the ABC television network date back to the 1950s, after Leonard Goldenson, following the United Paramount Theatres model, tried to use on ABC the same strategies he had made in expanding UPT's theater operation to the international market.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.2520, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.1952, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.3004, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.0883, 5.9628, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.3408, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.2143, 9.3326, 9.4501, 9.3611, 9.2729, 9.1856,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.9524, 9.8712, 9.9800, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.3289, 10.4330, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.2630, 11.3608, 11.4581, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.7687, 11.6949, 11.6217, 11.5489, 11.6441, 11.5718, 11.6667,\n 11.5948, 11.5235, 11.6179, 11.7120, 11.6411, 11.5706, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Does the given context contain the answer to the question? Answer 'yes' or 'no':\nQuestion: What was developed from Watt's measurements on a model steam engine?\nContext: The steam engine contributed much to the development of thermodynamic theory; however, the only applications of scientific theory that influenced the steam engine were the original concepts of harnessing the power of steam and atmospheric pressure and knowledge of properties of heat and steam.\nAnswer:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.7581, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.3736, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.6768, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.6983, 0.8447, 0.9901,\n 1.1345, 1.0812, 1.0284, 1.1711, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.9867, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.6437, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.7177, 0.6737, 0.6299, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 5.9944, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.3827, 7.2667, 7.1525, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.0171, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.2609, 9.1590, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.3333, 9.4563, 9.3582, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.7678, 9.8858, 10.0029, 9.9085,\n 10.0249, 9.9315, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.0261, 9.9373, 10.0504, 10.1627, 10.0748, 9.9878, 10.0995,\n 10.0133, 9.9278, 10.0389, 10.1494, 10.0647, 10.1745, 10.2837, 10.1999,\n 10.3085, 10.4164, 10.3333, 10.4407, 10.3583, 10.2766, 10.1955, 10.3024,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.6439, 10.7480, 10.8515, 10.9545,\n 11.0569, 10.9773, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.4244, 11.5234, 11.6220, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.6840, 11.7808, 11.8771, 11.9730, 11.8973, 11.9928, 11.9176,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.3143, 12.2403, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.3718, 12.2992, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + } + ], + "metrics": { + "accuracy_without_watermark": 0.57, + "accuracy_with_watermark": 0.49, + "f1_without_watermark": 0.5501621508525996, + "f1_with_watermark": 0.4498975299320462 + } + } + }, + "rte": { + "train": { + "results": [ + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: No Weapons of Mass Destruction Found in Iraq Yet.\nHypothesis: Weapons of Mass Destruction Found in Iraq.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 1.0613, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 1.0719, 1.2439, 1.4142,\n 1.5828, 1.7496, 1.9149, 1.8475, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.9658, 1.1140, 1.2611, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.9979, 0.9497, 0.9017, 1.0338, 1.1651, 1.2956, 1.4254,\n 1.5544, 1.6827, 1.8102, 1.9370, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.7693, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.5621, 1.5159, 1.4699, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.5924, 6.4273, 6.6150, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 6.8995, 6.7568, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.4858, 9.3721, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.0000, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.1754, 10.2923, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.5645, 11.4714, 11.5779, 11.4857,\n 11.5917, 11.6971, 11.6059, 11.7108, 11.8151, 11.7249, 11.6356, 11.7395,\n 11.6510, 11.7543, 11.8571, 11.9594, 12.0611, 11.9737, 12.0749, 12.1756,\n 12.0891, 12.1893, 12.1036, 12.2034, 12.3027, 12.2178, 12.3167, 12.4150,\n 12.5129, 12.6103, 12.5264, 12.6234, 12.7199, 12.6367, 12.7329, 12.6504,\n 12.7461, 12.8414, 12.7597, 12.8546, 12.9491, 12.8680, 12.7876, 12.7077,\n 12.6283, 12.7226, 12.6439, 12.5657, 12.6597, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.5265, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A place of sorrow, after Pope John Paul II died, became a place of celebration, as Roman Catholic faithful gathered in downtown Chicago to mark the installation of new Pope Benedict XVI.\nHypothesis: Pope Benedict XVI is the new leader of the Roman Catholic Church.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "11.6%", + "z-score": "-4.38", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.2404, -2.2937, -2.3462, -2.3982, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.7272, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.3041, -3.3420, -3.3797, -3.4171, -3.4543, -3.4913, -3.5280, -3.5645,\n -3.6008, -3.6369, -3.4769, -3.5132, -3.5494, -3.5853, -3.6210, -3.6566,\n -3.6919, -3.7270, -3.7619, -3.7966, -3.6420, -3.6770, -3.7117, -3.7463,\n -3.7808, -3.8150, -3.8490, -3.8829, -3.9166, -3.9501, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.9343, -3.9673, -4.0003, -4.0330, -4.0656, -4.0980,\n -3.9526, -3.9853, -4.0177, -4.0501, -4.0822, -4.1143, -4.1461, -4.1779,\n -4.2094, -4.2409, -4.0996, -4.1312, -4.1627, -4.1940, -4.2252, -4.2563,\n -4.2872, -4.3180, -4.3487, -4.3792, -4.2416, -4.2723, -4.3029, -4.3333,\n -4.3637, -4.3938, -4.4239, -4.4538, -4.4837, -4.5134, -4.3792])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 4.6188,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.1428, 8.0167, 7.8928,\n 8.0413, 7.9196, 8.0667, 7.9472, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.3641, 9.2609, 9.1590, 9.0582,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.0139, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.1925, 9.0987, 9.2202, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.0504, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.2106, 10.1243, 10.0389, 9.9542, 10.0647, 9.9807, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.7619, 9.8712, 9.9800, 9.8995, 9.8197, 9.9278,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 10.9220, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.0702, 11.1702, 11.0937, 11.0177, 10.9422, 11.0418, 10.9669,\n 10.8925, 10.9917, 10.9178, 10.8444, 10.9431, 11.0414, 11.1392, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.9701, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Herceptin was already approved to treat the sickest breast cancer patients, and the company said, Monday, it will discuss with federal regulators the possibility of prescribing the drug for more breast cancer patients.\nHypothesis: Herceptin can be used to treat breast cancer.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "34", + "# Tokens in Greenlist": "9", + "Fraction of T in Greenlist": "26.5%", + "z-score": "0.198", + "p value": "0.422", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774, 0.9802, 0.8165,\n 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428, 0.8083, 0.6794, 0.5556,\n 0.4364, 0.3216, 0.2108, 0.1037, 0.4082, 0.3015, 0.1980])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "76", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "39.5%", + "z-score": "2.91", + "p value": "0.00178", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660, 3.6566,\n 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094, 2.6605, 2.4495,\n 2.2517, 2.5820, 2.3938, 2.2156, 2.5281, 2.3570, 2.1939, 2.0381, 1.8889,\n 1.7457, 1.6082, 1.4757, 1.7628, 1.6330, 1.9096, 1.7823, 1.6590, 1.9245,\n 2.1831, 2.4351, 2.3113, 2.1909, 2.4345, 2.6726, 2.9055, 2.7852, 3.0123,\n 3.2348, 3.4528, 3.3333, 3.5466, 3.7559, 3.6380, 3.5228, 3.7273, 3.6141,\n 3.5032, 3.3947, 3.2883, 3.4873, 3.3824, 3.2796, 3.4743, 3.3729, 3.2733,\n 3.4641, 3.3657, 3.2691, 3.1741, 3.0806, 2.9887, 2.8983, 2.8093, 2.9938,\n 2.9057, 3.0873, 3.0000, 2.9140])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Judie Vivian, chief executive at ProMedica, a medical service company that helps sustain the 2-year-old Vietnam Heart Institute in Ho Chi Minh City (formerly Saigon), said that so far about 1,500 children have received treatment.\nHypothesis: The previous name of Ho Chi Minh City was Saigon.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.2179, -1.2566, -1.1279, -1.1667,\n -1.2052, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 7.8296, 7.7139, 7.8598, 8.0042,\n 7.8905, 7.7784, 7.6681, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.6173, 8.7482, 8.6459, 8.5448, 8.6747, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.3333, 9.2351, 9.3582, 9.2611, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.9547, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.2287, 10.1391, 10.2514, 10.3630, 10.2743, 10.1865, 10.0995,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.9259, 10.8423, 10.9473, 10.8644,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.4209, 11.3402, 11.4411, 11.3610, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.7200, 11.6425, 11.7401, 11.8373,\n 11.9340, 12.0302, 12.1260, 12.0493, 12.1447, 12.2397, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.1270, 12.0529, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A man is due in court later charged with the murder 26 years ago of a teenager whose case was the first to be featured on BBC One's Crimewatch. Colette Aram, 16, was walking to her boyfriend's house in Keyworth, Nottinghamshire, on 30 October 1983 when she disappeared. Her body was later found in a field close to her home. Paul Stewart Hutchinson, 50, has been charged with murder and is due before Nottingham magistrates later.\nHypothesis: Paul Stewart Hutchinson is accused of having stabbed a girl.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "21.2%", + "z-score": "-1.1", + "p value": "0.863", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.0050, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -0.9173, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "85", + "Fraction of T in Greenlist": "42.7%", + "z-score": "5.77", + "p value": "3.95e-09", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, 0.0000, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.3944, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.5680, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.9115, 1.0593, 1.2060, 1.3517, 1.4963, 1.6398, 1.5842,\n 1.7264, 1.6710, 1.6160, 1.7566, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.8699, 2.0068, 2.1429, 2.2780, 2.4122, 2.3570, 2.4902, 2.6224,\n 2.7539, 2.8845, 3.0143, 3.1433, 3.2715, 3.2152, 3.3424, 3.2863,\n 3.4126, 3.3567, 3.4821, 3.6067, 3.7306, 3.6745, 3.7975, 3.9198,\n 4.0415, 4.1624, 4.2827, 4.2262, 4.3456, 4.2893, 4.4080, 4.5260,\n 4.6434, 4.7602, 4.8763, 4.8197, 4.9351, 4.8787, 4.8227, 4.9373,\n 5.0513, 4.9953, 4.9397, 4.8845, 4.9975, 4.9425, 4.8877, 5.0000,\n 5.1117, 5.2229, 5.3335, 5.4436, 5.5532, 5.6622, 5.7707])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Britain said, Friday, that it has barred cleric, Omar Bakri, from returning to the country from Lebanon, where he was released by police after being detained for 24 hours.\nHypothesis: Bakri was briefly detained, but was released.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "92", + "# Tokens in Greenlist": "25", + "Fraction of T in Greenlist": "27.2%", + "z-score": "0.482", + "p value": "0.315", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.4815])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.0792, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.6098, 3.8297, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.2064, 4.4091, 4.2848, 4.1633, 4.0446, 4.2426, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.6571, 4.8394, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.1490, 5.0410, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.6667, 5.5630, 5.4610, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.0474, 5.9491, 5.8522, 6.0041, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.5008, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.7890, 6.6973, 6.8364, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.2001, 7.3333, 7.2443, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.5472, 7.4625, 7.5895,\n 7.5056, 7.6315, 7.7566, 7.8808, 7.7976, 7.7152, 7.6335, 7.7567,\n 7.6758, 7.7981, 7.9196, 8.0403, 8.1602, 8.0798, 8.0002, 8.1192,\n 8.0402, 8.1585, 8.2760, 8.3927, 8.5088, 8.4303, 8.3525, 8.2754,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.6556, 8.7681, 8.6921, 8.8039,\n 8.7284, 8.8396, 8.7647, 8.8752, 8.9851, 9.0944, 9.0200, 8.9461,\n 8.8728, 8.8000, 8.9086, 8.8364, 8.9444, 8.8726, 8.9800, 9.0869,\n 9.0155, 9.1218, 9.2276, 9.3328, 9.2619, 9.3665, 9.2961, 9.2261,\n 9.1566, 9.2607, 9.1916, 9.2952, 9.3982, 9.5007, 9.6028, 9.5341,\n 9.4658, 9.5673, 9.4995, 9.6005, 9.7011, 9.8012, 9.7337, 9.6667,\n 9.6000, 9.6996, 9.6334, 9.7325, 9.8311, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Nearly 4 million children who have at least one parent who entered the U.S. illegally were born in the United States and are U.S. citizens as a result, according to the study conducted by the Pew Hispanic Center. That's about three quarters of the estimated 5.5 million children of illegal immigrants inside the United States, according to the study. About 1.8 million children of undocumented immigrants live in poverty, the study found.\nHypothesis: Three quarters of U.S. illegal immigrants have children.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.3369, -0.2100, -0.0838, -0.1253, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "140", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "39.3%", + "z-score": "3.9", + "p value": "4.74e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 0.9366, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.6977, 1.6013, 1.5068, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.6678, 1.8791, 2.0870, 1.9959, 2.1997, 2.1094, 2.0207,\n 1.9335, 1.8477, 2.0455, 2.2404, 2.1546, 2.0702, 1.9870, 2.1773,\n 2.0948, 2.0135, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.9127, 1.8378, 1.7638, 1.9413, 2.1167, 2.0426, 2.2156,\n 2.1420, 2.3126, 2.2393, 2.1669, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.5207, 2.4495, 2.3791, 2.5403, 2.7001, 2.6296, 2.5600, 2.4910,\n 2.4228, 2.3552, 2.5117, 2.6667, 2.5991, 2.7524, 2.6852, 2.8368,\n 2.7699, 2.9200, 3.0688, 3.2163, 3.1492, 3.2953, 3.4402, 3.3731,\n 3.3066, 3.4499, 3.3838, 3.5256, 3.6664, 3.6004, 3.7399, 3.8784,\n 3.8125, 3.9497, 3.8841, 4.0202, 4.1552, 4.0898, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 3.9036])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Like the United States, U.N. officials are also dismayed that Aristide killed a conference called by Prime Minister Robert Malval in Port-au-Prince in hopes of bringing all the feuding parties together.\nHypothesis: Aristide had Prime Minister Robert Malval murdered in Port-au-Prince.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -2.3850, -2.4495,\n -2.5126, -2.1783, -2.2446, -1.9245, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.6713, -2.7247, -2.7775,\n -2.5236, -2.5775, -2.6308, -2.6833, -2.7351, -2.4930, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.7932, -2.8402,\n -2.8868, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -3.1730, -2.9775, -3.0210, -3.0641,\n -3.1069, -3.1493, -3.1914, -3.2332, -3.2746, -3.3156, -3.3564, -3.3968,\n -3.2116, -3.2525, -3.2931, -3.3333, -3.3733, -3.1928, -3.0140, -3.0551,\n -3.0958, -3.1363, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -2.9917, -3.0317, -3.0714, -3.1109, -3.1500, -3.1889, -3.0227, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.1305, -2.9704,\n -3.0089, -3.0471, -3.0851, -2.9277, -2.9659, -3.0039, -3.0417, -3.0792,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -2.9971,\n -3.0339, -3.0706, -3.1071, -3.1433, -3.1794, -3.2152, -3.0677, -2.9212,\n -2.9576, -2.9938, -3.0298, -2.8853, -2.9215, -2.9575, -2.9933, -3.0290,\n -2.8868, -2.9225, -2.9581, -2.8174, -2.8532, -2.8887, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.7541, -2.7894, -2.6534, -2.6888, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.7701, -2.8043, -2.8383, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.5%", + "z-score": "8.94", + "p value": "1.87e-19", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 3.7009, 3.9158, 3.7897, 3.6667,\n 3.8765, 3.7559, 3.6380, 3.5228, 3.4101, 3.6141, 3.8146, 3.7033,\n 3.9001, 3.7905, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552, 4.0415,\n 3.9386, 3.8376, 3.7383, 3.9208, 3.8228, 3.7264, 3.9056, 4.0825,\n 4.2571, 4.4296, 4.6000, 4.7683, 4.9346, 5.0990, 5.2615, 5.1640,\n 5.0679, 4.9731, 4.8797, 4.7875, 4.9472, 4.8561, 5.0138, 4.9237,\n 5.0795, 5.2338, 5.3865, 5.2970, 5.2086, 5.1212, 5.2719, 5.4212,\n 5.5690, 5.7155, 5.8606, 6.0044, 6.1470, 6.2883, 6.2008, 6.1143,\n 6.0288, 5.9442, 5.8605, 6.0000, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.2459, 6.1644, 6.0837, 6.0038, 6.1382, 6.2716, 6.4040, 6.3246,\n 6.4558, 6.3770, 6.5072, 6.4291, 6.3517, 6.4807, 6.6089, 6.7361,\n 6.8624, 6.7854, 6.9107, 6.8343, 6.7585, 6.8828, 6.8076, 6.7330,\n 6.6591, 6.5857, 6.7089, 6.8313, 6.7584, 6.8799, 6.8075, 6.7358,\n 6.6645, 6.7850, 6.9048, 6.8339, 6.9529, 6.8825, 6.8127, 6.7434,\n 6.8614, 6.7925, 6.9097, 6.8413, 6.9577, 6.8897, 7.0054, 7.1204,\n 7.0527, 7.1670, 7.2807, 7.3937, 7.3263, 7.2594, 7.3717, 7.4833,\n 7.5944, 7.7048, 7.8147, 7.9241, 7.8572, 7.9659, 7.8995, 8.0076,\n 7.9415, 8.0490, 8.1560, 8.0902, 8.1966, 8.3024, 8.4078, 8.3423,\n 8.2773, 8.3820, 8.4862, 8.5899, 8.6932, 8.7959, 8.7311, 8.6667,\n 8.6026, 8.7048, 8.8065, 8.9077, 8.8439, 8.9446])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: WASHINGTON -- A newly declassified narrative of the Bush administration's advice to the CIA on harsh interrogations shows that the small group of Justice Department lawyers who wrote memos authorizing controversial interrogation techniques were operating not on their own but with direction from top administration officials, including then-Vice President Dick Cheney and national security adviser Condoleezza Rice. At the same time, the narrative suggests that then-Defense Secretary Donald H. Rumsfeld and then-Secretary of State Colin Powell were largely left out of the decision-making process.\nHypothesis: Dick Cheney was the Vice President of Bush.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -0.9272, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, 0.0000, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.6885, -0.7295, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.5927, -0.4644, -0.3369, -0.3780, -0.4189, -0.2924, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "162", + "Fraction of T in Greenlist": "81.4%", + "z-score": "18.4", + "p value": "1.02e-75", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.4678, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.4550, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 6.8995, 7.0711, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.9169, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.2221, 12.3299, 12.4370, 12.5434, 12.6491,\n 12.5442, 12.6495, 12.7542, 12.8582, 12.9616, 13.0644, 13.1665, 13.2681,\n 13.3690, 13.4694, 13.5693, 13.6685, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.9594, 14.0561, 14.1524, 14.2481, 14.3434, 14.4381, 14.5324, 14.6262,\n 14.5277, 14.6212, 14.7143, 14.8069, 14.8990, 14.9907, 15.0819, 15.1727,\n 15.2631, 15.3530, 15.4425, 15.5316, 15.4360, 15.5249, 15.6133, 15.7014,\n 15.7890, 15.8763, 15.9632, 16.0497, 16.1358, 16.2216, 16.3070, 16.3920,\n 16.2990, 16.3839, 16.4684, 16.5525, 16.6363, 16.7197, 16.8028, 16.8855,\n 16.9680, 17.0500, 17.1318, 17.2133, 17.1227, 17.2040, 17.2850, 17.3656,\n 17.4460, 17.5260, 17.6058, 17.6852, 17.7643, 17.8432, 17.9217, 18.0000,\n 17.9117, 17.9899, 18.0677, 18.1453, 18.2226, 18.2996, 18.3763])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Only a week after it had no comment on upping the storage capacity of its Hotmail e-mail service, Microsoft early Thursday announced it was boosting the allowance to 250MB to follow similar moves by rivals such as Google, Yahoo, and Lycos.\nHypothesis: Microsoft's Hotmail has raised its storage capacity to 250MB.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "131", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "36.6%", + "z-score": "3.08", + "p value": "0.00105", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.3333,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.6833, 2.5873, 2.4930, 2.6914, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.2404, 2.1546, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.9333, 1.8543, 1.7765, 1.9612, 2.1436, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.9413, 2.1167, 2.0426, 1.9695,\n 1.8972, 2.0692, 1.9973, 2.1669, 2.0954, 2.2629, 2.4286, 2.5927,\n 2.7552, 2.9161, 3.0754, 3.0022, 3.1597, 3.0870, 3.0151, 2.9439,\n 2.8735, 2.8039, 2.7349, 2.8889, 2.8203, 2.7524, 2.6852, 2.6186,\n 2.5527, 2.7037, 2.6381, 2.5731, 2.5087, 2.4449, 2.3817, 2.5298,\n 2.6768, 2.8226, 2.7591, 2.6961, 2.6336, 2.5717, 2.7153, 2.8577,\n 2.7958, 2.9369, 3.0770])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 6.7893,\n 6.9589, 7.1261, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.8473,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.7277, 10.8477, 10.9669, 11.0851, 11.2025, 11.0902, 11.2069, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 11.7881, 11.6829, 11.5788, 11.6894, 11.7992, 11.9083, 11.8058,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.9288, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.3655, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.9771, 12.8819, 12.9820, 12.8877, 12.9874, 12.8942,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.0071, 13.1050, 13.2025, 13.2995,\n 13.2093, 13.3059, 13.4021, 13.3128, 13.4086, 13.3201, 13.4155, 13.5105,\n 13.6050, 13.6990, 13.7926, 13.8857, 13.7986, 13.7122, 13.6264, 13.5412,\n 13.4567, 13.5499, 13.6427, 13.5589, 13.6514, 13.5683, 13.6604, 13.7521,\n 13.6698, 13.7612, 13.8522, 13.7706, 13.8613, 13.9515, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.3166, 14.2367, 14.1573, 14.2455, 14.3333,\n 14.2546, 14.3422, 14.2640, 14.3513, 14.4382, 14.3607, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Lina Joy, 42, was born Azlina Jailani to Malay parents, and was raised as a Muslim. Malaysia's constitution guarantees freedom of religion, but by law, all ethnic Malays are Muslim. Joy converted to Christianity at age 26, and after some bureaucratic difficulties had her named legally changed in 1999. However, on her MyKad national ID, the National Registration Department retained her stated religion as Islam. In order to have her religion changed, the National Registration Department said Joy would have to obtain a certificate of apostasy from the Muslim Sharia Court.\nHypothesis: Lina Joy's parents are from Malaysia.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.4237, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 1.1471, 1.3646, 1.2778, 1.4907, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.5542, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.2710, 1.4446, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.3483, 1.5164, 1.4506, 1.3856, 1.5511, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.0461, -0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.1803, 0.1348, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.3522, 0.4828, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.5548, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415, 3.7808, 3.5382,\n 3.3113, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998, 3.1177, 3.3968, 3.2222,\n 3.4915, 3.7524, 3.5839, 3.4219, 3.2660, 3.1156, 2.9704, 3.2205, 3.0792,\n 3.3221, 3.1844, 3.4207, 3.2863, 3.5165, 3.7417, 3.9620, 4.1779, 4.0451,\n 4.2563, 4.1265, 4.0000, 4.2064, 4.4091, 4.6082, 4.8038, 4.6790, 4.5569,\n 4.4374, 4.6291, 4.8177, 5.0034, 4.8857, 5.0684, 5.2485, 5.1326, 5.0190,\n 4.9075, 4.7980, 4.9747, 5.1490, 5.3211, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 6.0928, 5.9874, 6.1450, 6.3008, 6.4550, 6.6075,\n 6.5033, 6.6541, 6.5514, 6.7006, 6.8483, 6.7469, 6.6469, 6.5483, 6.4510,\n 6.5970, 6.5008, 6.6454, 6.5504, 6.6935, 6.8354, 6.9759, 6.8819, 6.7890,\n 6.9282, 6.8364, 6.7456, 6.6559, 6.7937, 6.9303, 6.8414, 6.9768, 7.1111,\n 7.2443, 7.1563, 7.2884, 7.4194, 7.5494, 7.4622, 7.3758, 7.5048, 7.4193,\n 7.3346, 7.4625, 7.5895, 7.7155, 7.6315, 7.5484, 7.4661, 7.5910, 7.7152,\n 7.6335, 7.5526, 7.6758, 7.5955, 7.7178, 7.6383, 7.7597, 7.6808, 7.8014,\n 7.7232, 7.6456, 7.7653, 7.6883, 7.8072, 7.9253, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.2381, 8.3526, 8.4664, 8.3910, 8.3162, 8.4293, 8.3550,\n 8.2813, 8.3937, 8.5054, 8.6165, 8.5433, 8.6537, 8.7636, 8.6908, 8.8000,\n 8.9086, 9.0167, 9.1242, 9.2311, 9.1587, 9.0869, 9.1932, 9.1218, 9.2276,\n 9.1567, 9.0863, 9.0164, 8.9469, 9.0520, 8.9830, 9.0876, 9.0190, 9.1230,\n 9.0549, 8.9872, 9.0906, 9.0233, 8.9565, 9.0593, 9.1617, 9.0952, 9.1971,\n 9.2986, 9.3995, 9.5000, 9.6000, 9.5338, 9.4680, 9.5675, 9.6666, 9.6011,\n 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: November 9, 1989 , the day the Berlin Wall fell and the world changed forever . Not even the most astute saw it coming . As Hungary's foreign minister in the late summer of 1989 , Gyula Horn gave the order to let visiting East Germans use his country to do a 400-mile end run around the Berlin Wall , a move now seen as the beginning of the end for hard-line communism in Europe .\nHypothesis: The Berlin Wall was torn down in 1989.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, 0.1013, 0.2522, 0.2010, 0.1502, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.0461, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.2955, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.4425, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.9813, 9.8858, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.5224, 10.6338, 10.7444, 10.6534, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.2493, 11.1621,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.6311, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 11.8673,\n 11.7849, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.6930, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Valero Energy Corp., on Monday, said it found \"extensive\" additional damage at its 250,000-barrel-per-day Port Arthur refinery.\nHypothesis: Valero Energy Corp. produces 250,000 barrels per day.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.9599, 2.2011, 2.0889, 1.9795, 1.8728, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 2.2418, 2.1412, 2.0428, 1.9462, 1.8516,\n 2.0647, 1.9711, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185, 2.0207,\n 1.9335, 1.8477, 2.0455, 2.2404, 2.1546, 2.0702, 1.9870, 2.1773,\n 2.0948, 2.0135, 2.2000, 2.1193, 2.3028, 2.2226, 2.1436, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.0158, 2.1918, 2.3658, 2.2902, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.1669, 2.0954, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.9149, 2.0785, 2.2405, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.3552, 2.2884, 2.4444, 2.3779, 2.3120, 2.2468, 2.1822,\n 2.3354, 2.2711, 2.2074, 2.1442, 2.0817, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.9863, 1.9261, 1.8665, 1.8074, 1.9545, 1.8956, 2.0412,\n 1.9825, 1.9242, 2.0682, 2.2111, 2.3529, 2.4938, 2.6336, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.3422, 2.4797, 2.4225, 2.3657, 2.5019,\n 2.4453, 2.5802, 2.5238, 2.6576, 2.6014, 2.5456, 2.4902, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.5343, 2.4803, 2.4267, 2.3735,\n 2.5026, 2.4495, 2.3967, 2.5247, 2.6519, 2.7783, 2.9040, 3.0290,\n 2.9756, 3.0997, 3.0464, 3.1696, 3.1166, 3.0638, 3.1860, 3.1334,\n 3.0811, 3.0292, 3.1502, 3.0984, 3.0469, 3.1669, 3.2863, 3.4050,\n 3.5232, 3.4713, 3.4198, 3.3686, 3.3177, 3.2671, 3.2167, 3.1667,\n 3.1169, 3.0674, 3.1836, 3.1342, 3.2496, 3.2004, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.7457, 2.0370, 1.8974, 2.1776, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.5396, 1.4237, 1.3112, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.9795, 1.8728, 2.1054, 2.3333,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.0929, 2.9856, 3.1918, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.4743, 3.6662, 3.5642, 3.7528,\n 3.9386, 4.1219, 4.3026, 4.4809, 4.6568, 4.8305, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.0587, 6.2075, 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.2532, 7.3901, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.8833, 9.0057, 9.1273, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.6306, 10.7367, 10.6537, 10.7594, 10.6771,\n 10.7822, 10.7006, 10.8051, 10.9091, 11.0125, 11.1154, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.3610, 11.2816, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.8007, 12.8928, 12.8169, 12.7416, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Oil prices fall back as Yukos oil threat lifted\nHypothesis: Oil prices rise.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.0785, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.2711, -2.3150, -2.3586, -2.1884, -2.0197, -2.0641, -2.1082,\n -2.1519, -1.9863, -2.0303, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.1880, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.2966, -2.3351, -2.1909,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.3938, -2.2618, -2.2977, -2.3333,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.9528, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.6904, 4.8669, 5.0410, 5.2129, 5.1065, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.0943, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 8.8833, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.0060, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.2592, 10.1745, 10.2837, 10.1999,\n 10.3085, 10.4164, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.6771,\n 10.5955, 10.5145, 10.4341, 10.5393, 10.6439, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 10.8200, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.1473, 11.0702, 11.1702, 11.2698, 11.3688, 11.4674, 11.5655, 11.6632,\n 11.5868, 11.6840, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.7585, 12.8499, 12.7756, 12.8667, 12.7928, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Brian Brohm, the Louisville quarterback, threw for 368 yards and five touchdowns as the Cardinals beat visiting Oregon State 63-27.\nHypothesis: The quarterback threw for 413 yards and three touchdowns, and then ran to the end zone two more times.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.2722, 0.4070, 0.3607, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.9646, 6.8419, 7.0014, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 7.9216, 8.0632, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 8.8007, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.1860, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.1936, 12.2963, 12.3985, 12.5001, 12.6012, 12.7017,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.4868,\n 13.5827, 13.4920, 13.5876, 13.6826, 13.7772, 13.6876, 13.5987, 13.6931,\n 13.7870, 13.8804, 13.9735, 14.0660, 14.1582, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.1725, 14.2633, 14.3537, 14.2686, 14.3587, 14.4484,\n 14.5378, 14.6267, 14.5426, 14.6313, 14.7195, 14.6362, 14.7242, 14.8119,\n 14.8991, 14.8167, 14.9037, 14.9903, 15.0766, 15.1625, 15.2481, 15.1667,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Greg Page, a former heavyweight boxing champion who suffered a severe brain injury in a 2001 fight, has died at 50. His wife Patricia said the one-time WBA champion had died at his home in Kentucky, USA, of complications related to injuries he suffered in the fight. Page was in a coma for a week after the 9 March 2001 fight against Dale Crowe which was stopped in the 10th round. Patricia Page said he was \"in a better place now\" after announcing on Monday he had died overnight in his sleep.\nHypothesis: Greg Page was a WBA champion.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.6678, 1.5785, 1.7889, 1.7002, 1.6131, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.6803, 1.8766, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.4755, 1.4027, 1.3308, 1.5119, 1.4403, 1.6187, 1.7951, 1.9695,\n 1.8972, 1.8257, 1.9973, 1.9262, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.7496, 1.9149, 2.0785, 2.0107, 2.1723, 2.1049, 2.0381,\n 1.9720, 1.9066, 1.8419, 2.0000, 2.1567, 2.3120, 2.2468, 2.1822,\n 2.1182, 2.0548, 2.2074, 2.3586, 2.5087, 2.4449, 2.5934, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.4887, 2.6336, 2.7775, 2.7153, 2.8577,\n 2.7958, 2.7344, 2.6735, 2.6131, 2.7534, 2.6933, 2.8324, 2.9704,\n 3.1076, 3.0471, 2.9872, 2.9277, 2.8687, 2.8101, 2.9451, 2.8868,\n 2.8288, 2.7713, 2.7143, 2.6576, 2.6014, 2.7341, 2.6781, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.3500, 2.4803, 2.4267, 2.3735,\n 2.3206, 2.2680, 2.3967, 2.5247, 2.4721, 2.4198, 2.3679, 2.3163,\n 2.2650, 2.2140, 2.3400, 2.2892, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 2.1145, 2.0656, 2.0170, 1.9686, 1.9206, 2.0430,\n 2.1648, 2.1167, 2.0688, 2.0212, 1.9738, 1.9267, 1.8799, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 4.9507, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.1261, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 9.2055, 9.3386, 9.2229, 9.1088,\n 9.2410, 9.1287, 9.0179, 8.9086, 9.0401, 8.9324, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.6348, 9.7574, 9.8792, 10.0000, 10.1199, 10.2390, 10.1379, 10.0380,\n 10.1564, 10.2740, 10.1754, 10.2923, 10.4083, 10.5236, 10.4263, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.5769, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.3642, 11.4714, 11.3791, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.8151, 11.7249, 11.6356, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.2503, 12.3508, 12.2628, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.9840, 12.8997, 12.9952, 12.9116, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.4758, 13.3933, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.6896, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.0784, 14.1667,\n 14.2546, 14.1764, 14.0986, 14.1863, 14.1091, 14.0324, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Sierra is likely to remain in jail at the Hillsborough County jail in her native Tampa until her next hearing on December 20, where she is being held without bail, which would prevent her attending the Washington event on Friday even if she still had permission to perform. Sierra has been in jail since the start of the month after an altercation with police officers outside a Tampa nightclub, which she had been ejected from. She is charged with disorderly intoxication and resisting arrest.\nHypothesis: Sierra once reached the finals of \"American Idol\".\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "179", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "27.4%", + "z-score": "0.734", + "p value": "0.232", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.9238, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.9739, 1.1094, 1.0598, 1.0105, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.8909,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.7336])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.9293, 7.1393, 7.3435, 7.5425,\n 7.7365, 7.9259, 8.1111, 7.8558, 7.6120, 7.8003, 7.9845, 8.1650,\n 8.3418, 8.5153, 8.2952, 8.4678, 8.6373, 8.8039, 8.9677, 9.1287,\n 9.2872, 9.0869, 8.8926, 8.7039, 8.5206, 8.6828, 8.5057, 8.3333,\n 8.1654, 8.0017, 8.1654, 8.0064, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.9097, 7.7723, 7.6376, 7.7942,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.3709,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.2339, 11.1291,\n 11.0254, 10.9229, 11.0368, 11.1500, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.9288, 12.0357, 11.9370, 11.8392,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.7875, 12.6939, 12.6012, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.4086, 13.5039, 13.4155, 13.5105,\n 13.6050, 13.5176, 13.6117, 13.7054, 13.7986, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.1781, 14.2686, 14.3587, 14.2744,\n 14.1906, 14.2805, 14.3700, 14.2870, 14.2046, 14.1227, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.5726, 14.6599, 14.5797, 14.6667,\n 14.5871, 14.6738, 14.7601, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Since 1987, however, Brazil has taken steps to dramatically reduce the destruction, including stepped-up enforcement and the elimination of tax incentives that led to large-scale land clearing.\nHypothesis: In the early 1990s Brazil began to take action to save the rainforest.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.2649,\n 1.4171, 1.3590, 1.3014, 1.4517, 1.6008, 1.5430, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.5291, 1.4744, 1.4201, 1.5614, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.7384, 1.8732,\n 1.8204, 1.9540, 2.0868, 2.0339, 1.9813, 1.9291, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.3131, 1.4402, 1.3926,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.2377, 1.1918,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.2752, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415, 3.7808, 3.5382,\n 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 5.0389, 4.8488, 4.6663, 4.4907, 4.7237, 4.9507, 4.7819, 5.0037,\n 4.8407, 4.6829, 4.5301, 4.7469, 4.5985, 4.8107, 4.6664, 4.8742, 5.0779,\n 4.9373, 4.8003, 4.6667, 4.8662, 5.0623, 4.9316, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.5549, 5.4295, 5.6099, 5.7877, 5.6647, 5.8398, 5.7192, 5.8919,\n 6.0622, 5.9438, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.5137, 6.4065, 6.3008, 6.1968, 6.3509,\n 6.5033, 6.4006, 6.5514, 6.4501, 6.3502, 6.2517, 6.4008, 6.3035, 6.4510,\n 6.3549, 6.5008, 6.4059, 6.5504, 6.4566, 6.5997, 6.7414, 6.6486, 6.7890,\n 6.9282, 6.8364, 6.9743, 7.1110, 7.0201, 7.1556, 7.2900, 7.4233, 7.3333,\n 7.2443, 7.3765, 7.2884, 7.2012, 7.1149, 7.2459, 7.3758, 7.2904, 7.4193,\n 7.3346, 7.2508, 7.1678, 7.2956, 7.2134, 7.3402, 7.2587, 7.3845, 7.5094,\n 7.4286, 7.3485, 7.4724, 7.3930, 7.3143, 7.2363, 7.3592, 7.4813, 7.4039,\n 7.5251, 7.4483, 7.3721, 7.2966, 7.4168, 7.3419, 7.4613, 7.3869, 7.5056,\n 7.6235, 7.5495, 7.4762, 7.5933, 7.5204, 7.4482, 7.3765, 7.4927, 7.6082,\n 7.5369, 7.6517, 7.5809, 7.5106, 7.4409, 7.5548, 7.4855, 7.5988, 7.5299,\n 7.6424, 7.7544, 7.6859, 7.6179, 7.7291, 7.6615, 7.5944, 7.5277, 7.6381,\n 7.7480, 7.6816, 7.7908, 7.7249, 7.6594, 7.5944, 7.7028, 7.6381, 7.7460,\n 7.6816, 7.7889, 7.8956, 7.8316, 7.7679, 7.8740, 7.8107, 7.7478, 7.6853,\n 7.7907, 7.8956, 7.8333, 7.9377, 7.8758, 7.8142, 7.7530, 7.8567, 7.7958,\n 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: FIFA has received 11 bids to host the 2018 and 2022 FIFA World Cup tournaments, an international football competition contested by the men's national teams. The countries vying to host the tournament are Australia, England, Indonesia, Japan, Mexico, Qatar, Russia, South Korea and United States, who have individual bids and the joint bids are from Belgium-Netherlands and Spain-Portugal. Select bids are for 2018 and 2022 tournaments and two bids are just for the 2022 tournament. Qatar and South Korea are vying just for the 2022 tournament. The two winning bids will be chosen on December 2010 by the 24-man executive committee of FIFA. Said FIFA president Sepp Blatter: \"We are very pleased about the fantastic level of interest in our flagship competition, with all initial bidders confirming their candidature.\"\nHypothesis: Sepp Blatter is the president of FIFA.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -0.9608, -0.7137, -0.4714, -0.5449, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368, 9.4281,\n 9.3207, 9.2147, 9.3422, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.7678, 9.8858, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.7074, 10.6145,\n 10.7257, 10.6338, 10.5427, 10.4524, 10.5632, 10.4738, 10.5841, 10.4956,\n 10.4079, 10.5175, 10.6265, 10.7349, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.3043, 11.4080, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.4829, 11.5841, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.8210, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.9558, 12.0532, 12.1502, 12.2467, 12.3428, 12.4384, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.7847, 12.8771, 12.9691, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.0910, 13.0157, 12.9410, 13.0316, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: U.S. crude settled $1.32 lower at $42.83 a barrel.\nHypothesis: Crude the light American lowered to the closing 1.32 dollars, to 42.83 dollars the barrel.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.1896, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.6131, 1.8185, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.9604, 1.8766, 1.7942, 1.9870, 1.9052,\n 2.0948, 2.0135, 1.9333, 1.8543, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.7233,\n 1.6524, 1.8257, 1.9973, 1.9262, 1.8559, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.9149, 1.8475, 2.0107, 1.9437, 2.1049, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 2.1567, 2.3120, 2.4660, 2.4004,\n 2.3354, 2.2711, 2.4227, 2.3586, 2.2952, 2.4449, 2.3817, 2.5298,\n 2.4669, 2.4045, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.0412,\n 2.1858, 2.1268, 2.0682, 2.0101, 1.9524, 2.0948, 2.2361, 2.1783,\n 2.3183, 2.4574, 2.3995, 2.3422, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.2535, 2.3891, 2.3333, 2.4678, 2.4122, 2.5456, 2.6781, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.5886, 2.7186, 2.8478, 2.7930, 2.7386,\n 2.6846, 2.8124, 2.7585, 2.7050, 2.8316, 2.7783, 2.9040, 2.8508,\n 2.7979, 2.7454, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.4099, 2.5319, 2.6534, 2.6034, 2.7240,\n 2.8440, 2.7940, 2.7443, 2.8633, 2.8137, 2.7644, 2.7154, 2.6667,\n 2.7844, 2.7358, 2.8528, 2.8043, 2.9205, 3.0363, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "59.4%", + "z-score": "11.1", + "p value": "3.68e-29", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.6353, 1.8728, 2.1054, 2.3333,\n 2.2269, 2.4495, 2.6679, 2.5621, 2.7757, 2.6713, 2.8804, 3.0861,\n 2.9824, 2.8808, 3.0817, 3.2796, 3.1787, 3.0796, 3.2733, 3.1754,\n 3.0793, 3.2691, 3.4562, 3.3607, 3.5447, 3.7264, 3.6315, 3.8103,\n 3.9869, 3.8927, 4.0667, 3.9736, 4.1451, 4.3146, 4.2222, 4.1312,\n 4.2981, 4.4630, 4.3727, 4.2836, 4.4462, 4.3580, 4.2710, 4.4313,\n 4.3451, 4.5035, 4.6603, 4.8154, 4.7296, 4.8830, 4.7980, 4.9497,\n 4.8655, 5.0156, 5.1643, 5.3116, 5.4576, 5.6023, 5.7457, 5.8878,\n 6.0288, 5.9442, 5.8605, 6.0000, 6.1383, 6.2755, 6.1924, 6.1101,\n 6.2459, 6.1644, 6.2991, 6.4327, 6.3517, 6.4842, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.7155, 6.6365, 6.7648, 6.8922, 7.0187, 7.1443,\n 7.2691, 7.3930, 7.5161, 7.6383, 7.7597, 7.6808, 7.8014, 7.9212,\n 8.0402, 8.1585, 8.2760, 8.3927, 8.3143, 8.4303, 8.3525, 8.4678,\n 8.3906, 8.5052, 8.6190, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.2118, 9.1357, 9.2450, 9.3537, 9.4619, 9.3863, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.7574, 9.6850, 9.7886, 9.8918, 9.9944, 10.0965,\n 10.1981, 10.2993, 10.3999, 10.5001, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.4846, 10.4140, 10.3439, 10.4427, 10.5410, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: WINNENDEN, Germany \u2015A teenage gunman killed 15 people, most of them female, on Wednesday in a rampage that began at a school near Stuttgart in southern Germany and ended in a nearby town, where he then killed himself after the police wounded him. The attack left Germany, which tightened tough gun controls after a similar attack at a school seven years ago, struggling to understand the carnage that had again befallen it, a country with relatively little violent crime. In 2002, a gunman killed 16 people before killing himself at a school in Erfurt, in eastern Germany.\nHypothesis: In 2002 near Stuttgart a boy shot 16 people.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.6508, -0.6983, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.2907, -0.1448, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.5410, 0.4944, 0.4481, 0.5808, 0.7127,\n 0.8438, 0.7971, 0.9272, 0.8805, 0.8340, 0.7878, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.7703, 0.7255, 0.8513,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "152", + "Fraction of T in Greenlist": "77.6%", + "z-score": "17", + "p value": "4.82e-65", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 4.8008, 5.0389, 5.2705, 5.0811, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 6.7254, 6.8995, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.8271, 9.9570, 10.0857, 9.9656, 10.0935,\n 9.9754, 10.1024, 10.2283, 10.3532, 10.2375, 10.1234, 10.2476, 10.3709,\n 10.4932, 10.6145, 10.7348, 10.8542, 10.7429, 10.8616, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.2207, 12.1136, 12.0077, 11.9029, 12.0118, 12.1200, 12.0167,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.8618, 12.9639, 13.0655, 13.1665, 13.2669, 13.3667, 13.4660,\n 13.5647, 13.6630, 13.5647, 13.6626, 13.5654, 13.6629, 13.5668, 13.4715,\n 13.5688, 13.6656, 13.7619, 13.8577, 13.9530, 14.0479, 14.1422, 14.2361,\n 14.1428, 14.2364, 14.3295, 14.4222, 14.5144, 14.6062, 14.6976, 14.7885,\n 14.8790, 14.9691, 14.8779, 14.9677, 14.8773, 14.9669, 15.0560, 15.1448,\n 15.2332, 15.3211, 15.4087, 15.4959, 15.5828, 15.6692, 15.7553, 15.8411,\n 15.7529, 15.8384, 15.9235, 16.0083, 16.0928, 16.0057, 16.0900, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.3377, 16.4205, 16.5028, 16.5849, 16.6667,\n 16.7481, 16.8292, 16.9101, 16.9906])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Many hopes are riding on the sale of Talisman's holdings in Palm Beach and Hendry counties, which Vice President Al Gore announced with much fanfare last year at the 50th anniversary of Everglades National Park.\nHypothesis: Everglades National Park is located in Florida.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.9795, 1.8728, 1.7685, 2.0000,\n 1.8970, 2.1229, 2.0211, 1.9215, 2.1412, 2.0428, 1.9462, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.0870, 2.2916, 2.1997, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.3276, 2.2404, 2.4327, 2.3462, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.6491, 2.8292, 2.7456, 2.6632, 2.5820,\n 2.5019, 2.6778, 2.5983, 2.7717, 2.6928, 2.6148, 2.7854, 2.7080,\n 2.6316, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.6656, 2.5927,\n 2.7552, 2.6828, 2.6112, 2.5403, 2.7001, 2.6296, 2.7875, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.3779, 2.3120, 2.4660, 2.6186,\n 2.5527, 2.4874, 2.4227, 2.3586, 2.2952, 2.4449, 2.5934, 2.7406,\n 2.8868, 2.8226, 2.9673, 3.1109, 3.0467, 2.9832, 3.1251, 3.0619,\n 2.9991, 2.9369, 2.8753, 3.0151, 2.9537, 3.0923, 3.0311, 2.9704,\n 2.9103, 2.8505, 2.9872, 3.1229, 3.2577, 3.3915, 3.3314, 3.4641,\n 3.5960, 3.5359, 3.4762, 3.6068, 3.5474, 3.4884, 3.4298, 3.3717,\n 3.5007, 3.4428, 3.3853, 3.3282, 3.2715, 3.2152, 3.1593, 3.2863,\n 3.4126, 3.5382, 3.6629, 3.6067, 3.7306, 3.8538, 3.7975, 3.7417,\n 3.8638, 3.8081, 3.7528, 3.6979, 3.6433, 3.7641, 3.7097, 3.8297,\n 3.7755, 3.7216, 3.6680, 3.6148, 3.7335, 3.8516, 3.9691, 4.0860,\n 4.0325, 4.1487, 4.2642, 4.2108, 4.1576, 4.2723, 4.2193, 4.1667,\n 4.1143, 4.0622, 4.1758, 4.1239, 4.0723, 4.0210, 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.2488, 7.4061, 7.2815, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.5067, 8.3984, 8.2916, 8.4270, 8.5612, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.2816, 8.1816, 8.3138, 8.4449, 8.3463, 8.2488, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.4173, 8.5381, 8.6581, 8.5732,\n 8.4891, 8.6083, 8.7267, 8.8443, 8.9612, 9.0773, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.3686, 9.4812, 9.5931, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.5714, 9.6814, 9.6016, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 9.8590,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.6144, 10.5388, 10.6404, 10.5654, 10.4909, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.4724, 10.5725, 10.5001, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.4846, 10.4140, 10.3439, 10.2743, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.6944, 10.6256, 10.7222, 10.8184, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Rabies virus infects the central nervous system, causing encephalopathy and ultimately death. Early symptoms of rabies in humans are nonspecific, consisting of fever, headache, and general malaise.\nHypothesis: Rabies is fatal in humans.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "26.3%", + "z-score": "0.402", + "p value": "0.344", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.2056, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.3825, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, 0.0490, 0.1952, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.0000, 0.1382, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.3146, 0.4481, 0.4021])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 4.7819, 5.0037, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.8140, 5.9944, 5.8635,\n 6.0413, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 6.7769, 6.6679, 6.5607, 6.7132,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 7.8074, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.9495, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.2222, 8.3503, 8.4774, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.0060, 8.9178, 8.8304, 8.7439, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.6471, 9.7590, 9.8702, 9.9807, 9.8975, 9.8150,\n 9.7331, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 10.0353, 9.9562, 10.0631, 9.9846, 10.0910, 10.1968, 10.1189, 10.2242,\n 10.3289, 10.2516, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.6404, 10.5654, 10.4909, 10.4170, 10.3435, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.2607, 11.1883, 11.1164, 11.2129, 11.3091, 11.2376, 11.3333,\n 11.2624, 11.1919, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: American tobacco companies were showing a profit most quarters due to export sales of cigarettes and diversification of products sold including food.\nHypothesis: PM often entered markets with both cigarettes and food.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.8%", + "z-score": "0.903", + "p value": "0.183", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.0695, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 1.0498, 0.9864, 1.1547, 1.0915, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.0507, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.2649,\n 1.2072, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.9497, 1.0820, 1.0338, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.6885, 0.8154, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.8773, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.9461, 0.9027])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.0000,\n 7.1756, 7.0219, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.0401, 8.9324, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.4896, 9.6141, 9.7376,\n 9.6348, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.1948, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.5955, 10.7074, 10.8186,\n 10.7257, 10.6338, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.2316, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.7157, 11.6311, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.0660, 12.1646, 12.2627, 12.1805, 12.0990, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.5179, 12.6130, 12.7077,\n 12.6283, 12.5495, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.0608, 12.9845, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The development of agriculture by early humans, roughly 10,000 years ago, was also harmful to many natural ecosystems as they were systematically destroyed and replaced with artificial versions.\nHypothesis: Humans existed 10,000 years ago.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.4444, -0.4977, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.7143, -0.7593, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.2326, -1.2730, -1.1380, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.4444, 5.6737, 5.8966, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.4878, 7.6613, 7.8320, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.1742, 8.0238,\n 7.8766, 7.7326, 7.8923, 8.0498, 7.9097, 7.7723, 7.9286, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.1287, 9.0179, 9.1493, 9.0401, 8.9324, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.1948, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.6894, 10.8012, 10.9123, 10.8186,\n 10.9291, 10.8363, 10.7444, 10.8544, 10.7635, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.6827, 11.7851, 11.8870, 11.8010,\n 11.7157, 11.6311, 11.7326, 11.8336, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.1805, 12.2782, 12.3754, 12.2940,\n 12.3908, 12.3100, 12.2298, 12.3263, 12.2467, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.0608, 13.1520, 13.2429, 13.1667,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.1219, 13.2118, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The two young leaders of the coup, Pibul Songgram and Pridi Phanomyang, both educated in Europe and influenced by Western ideas, came to dominate Thai politics in the ensuing years.\nHypothesis: Pibul was a young leader.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n -0.0493, 0.0983, 0.0490, 0.1952, 0.1459, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.3522, 0.3073, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.5000,\n 0.4571, 0.4145, 0.5375, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.1859, 6.0421, 5.9017, 5.7646, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.3249, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.1291, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.8753, 9.7890, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.4769, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.4407, 10.5475, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.9906, 10.9091, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.6412, 11.5613, 11.6606, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 12.0712, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.9691, 13.0608, 13.1520, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Lin Piao, after all, was the creator of Mao's \"Little Red Book\" of quotations.\nHypothesis: Lin Piao wrote the \"Little Red Book\".\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 0.8513, 1.0948, 1.0000,\n 1.2372, 1.4697, 1.6977, 1.9215, 1.8240, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.8791, 2.0870, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.7765, 1.9612, 1.8838, 1.8074,\n 1.9887, 2.1678, 2.0913, 2.0158, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.9262, 1.8559, 1.7865, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.9149, 1.8475, 2.0107, 1.9437, 1.8773, 1.8116,\n 1.7467, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.3127, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.1267, 0.0000, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.6353, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.6977, 1.9215, 2.1412, 2.3570, 2.2576, 2.4689,\n 2.6765, 2.5775, 2.4804, 2.6833, 2.5873, 2.7863, 2.9823, 3.1754,\n 3.3657, 3.5533, 3.7383, 3.9208, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.2571, 4.4296, 4.6000, 4.5034, 4.6715, 4.5760, 4.4820, 4.3894,\n 4.5547, 4.4630, 4.6262, 4.7875, 4.6967, 4.8561, 5.0138, 5.1698,\n 5.3243, 5.4772, 5.3865, 5.2970, 5.4480, 5.5976, 5.7458, 5.8926,\n 5.8035, 5.9488, 6.0927, 6.0044, 6.1470, 6.0596, 6.2008, 6.3408,\n 6.2541, 6.1685, 6.0838, 6.0000, 6.1383, 6.2755, 6.4116, 6.5465,\n 6.4632, 6.5970, 6.5144, 6.6471, 6.7788, 6.9094, 6.8274, 6.7462,\n 6.6658, 6.7952, 6.7155, 6.6365, 6.7648, 6.6865, 6.8138, 6.7361,\n 6.8624, 6.7854, 6.9107, 6.8343, 6.7585, 6.6833, 6.8076, 6.9310,\n 7.0537, 7.1755, 7.2966, 7.4168, 7.5364, 7.6551, 7.5800, 7.6980,\n 7.6235, 7.7407, 7.6667, 7.5933, 7.7096, 7.6368, 7.5644, 7.4927,\n 7.6082, 7.7230, 7.8372, 7.7658, 7.6950, 7.6246, 7.5548, 7.6681,\n 7.5988, 7.7114, 7.8233, 7.9347, 8.0455, 8.1556, 8.2652, 8.1960,\n 8.1273, 8.2362, 8.1679, 8.1001, 8.2084, 8.1410, 8.2486, 8.3557,\n 8.4623, 8.3952, 8.5012, 8.4345, 8.3683, 8.4736, 8.5785, 8.6828,\n 8.7867, 8.8900, 8.9929, 8.9268, 8.8612, 8.7959, 8.8982, 8.8333,\n 8.7689, 8.8706, 8.8065, 8.9077, 8.8439, 8.9446, 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: When patients interrupt a course of antibiotics, the surviving bacteria return with a vengeance, often having rapidly mutated to resist the therapy.\nHypothesis: Bacteria is winning the war against antibiotics.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.2831, -1.1025, -1.1547, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.6081, -1.6521, -1.6958, -1.5404, -1.5842,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.4967, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.4194, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.2623, -1.1237, -1.1651, -1.2063, -1.0690,\n -1.1103, -1.1513, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.0777, -0.9509, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415, 4.3409, 4.0825,\n 4.3710, 4.6476, 4.4096, 4.1851, 3.9727, 4.2426, 4.0415, 4.3027, 4.1111,\n 4.3644, 4.6101, 4.4272, 4.2515, 4.4907, 4.7237, 4.5547, 4.3916, 4.2339,\n 4.0814, 3.9337, 3.7905, 4.0166, 3.8772, 3.7417, 3.6098, 3.4816, 3.3566,\n 3.2348, 3.4528, 3.3333, 3.2167, 3.1027, 3.3147, 3.2026, 3.0929, 3.2998,\n 3.5032, 3.3947, 3.5942, 3.7905, 3.9837, 4.1740, 4.3614, 4.2528, 4.1461,\n 4.3301, 4.5115, 4.6904, 4.8669, 4.7610, 4.9348, 5.1065, 5.0019, 4.8990,\n 5.0680, 5.2350, 5.1333, 5.2981, 5.4610, 5.6220, 5.7812, 5.9386, 5.8377,\n 5.9932, 6.1471, 6.0474, 6.1996, 6.3502, 6.2517, 6.4008, 6.5483, 6.6944,\n 6.8391, 6.7416, 6.6454, 6.5504, 6.6935, 6.8354, 6.7414, 6.8819, 6.7890,\n 6.6973, 6.8364, 6.9743, 6.8834, 6.7937, 6.9303, 6.8414, 6.9768, 7.1111,\n 7.0231, 7.1563, 7.2884, 7.2012, 7.3322, 7.2459, 7.1605, 7.2904, 7.4193,\n 7.3346, 7.2508, 7.1678, 7.0857, 7.0043, 7.1319, 7.0513, 7.1779, 7.0980,\n 7.2236, 7.3485, 7.2691, 7.1904, 7.3143, 7.4373, 7.3592, 7.2818, 7.2051,\n 7.1291, 7.0537, 6.9789, 7.1007, 7.0265, 6.9529, 6.8799, 6.8075, 6.7358,\n 6.6645, 6.7850, 6.7143, 6.6441, 6.5745, 6.6939, 6.6248, 6.5561, 6.6747,\n 6.7925, 6.7242, 6.8413, 6.9577, 7.0735, 7.1885, 7.3030, 7.2348, 7.1670,\n 7.0998, 7.2134, 7.1465, 7.0801, 7.1929, 7.3051, 7.2391, 7.3506, 7.4615,\n 7.5719, 7.5061, 7.6158, 7.7249, 7.8335, 7.7679, 7.7028, 7.6381, 7.7460,\n 7.8533, 7.9601, 7.8956, 8.0018, 7.9377, 8.0433, 8.1485, 8.0847, 8.1892,\n 8.2933, 8.2298, 8.1667, 8.1039, 8.0416, 8.1449, 8.0829, 8.1858, 8.2882,\n 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Initially the Bundesbank opposed the introduction of the euro but was compelled to accept it in light of the political pressure of the capitalist politicians who supported its introduction.\nHypothesis: The introduction of the euro has been opposed.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.5222, 0.6928, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.6660, 0.8165,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.4659, 2.7406, 2.5924, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 3.7009, 3.9158, 4.1265, 4.0000,\n 4.2064, 4.0825, 3.9614, 4.1633, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.0937, 4.2844, 4.1740, 4.3614, 4.2528, 4.4371, 4.3301,\n 4.2251, 4.1219, 4.3026, 4.4809, 4.6568, 4.5544, 4.4537, 4.6268,\n 4.7977, 4.6981, 4.6000, 4.5034, 4.6715, 4.8375, 4.7419, 4.9058,\n 5.0679, 5.2281, 5.1332, 5.2915, 5.4482, 5.6032, 5.7566, 5.6622,\n 5.5691, 5.7207, 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 7.1110, 7.2466,\n 7.3810, 7.2900, 7.2001, 7.3333, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.5494, 7.4622, 7.5912, 7.7192, 7.6328, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 9.0453, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 9.9249, 10.0342, 10.1429, 10.2509, 10.3583, 10.4652, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.7242, 10.6439, 10.7480, 10.6683, 10.7719,\n 10.6929, 10.6145, 10.7175, 10.6397, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.5109, 11.4356, 11.5329, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.6731, 11.5993, 11.6949, 11.7901, 11.7169, 11.8117, 11.7389, 11.8333,\n 11.9273, 12.0209, 12.1141, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Armed Forces Press Committee (COPREFA) admitted that the government troops sustained 11 casualties in these clashes, adding that they inflicted three casualties on the rebels.\nHypothesis: Three rebels were killed by government troops.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "22.0%", + "z-score": "-0.87", + "p value": "0.808", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -0.9733, -0.8076, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.9492, -0.9933, -0.8485, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -0.8268, -0.8700])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.7765, 1.9612, 2.1436, 2.0656,\n 2.2453, 2.1678, 2.0913, 2.2678, 2.4423, 2.3658, 2.2902, 2.4618,\n 2.6316, 2.5560, 2.4814, 2.6485, 2.8138, 2.7393, 2.9025, 2.8284,\n 2.9897, 2.9161, 3.0754, 3.0022, 2.9299, 2.8583, 3.0151, 2.9439,\n 2.8735, 2.8039, 2.7349, 2.8889, 2.8203, 2.9726, 2.9044, 2.8368,\n 2.9872, 2.9200, 3.0688, 3.2163, 3.3627, 3.5079, 3.6519, 3.7947,\n 3.7265, 3.6590, 3.8002, 3.7330, 3.6664, 3.8061, 3.7399, 3.6742,\n 3.8125, 3.7471, 3.8841, 4.0202, 4.1552, 4.2893, 4.4224, 4.5547,\n 4.6860, 4.8164, 4.7501, 4.8795, 4.8135, 4.9419, 5.0694, 5.1962,\n 5.3220, 5.2560, 5.3810, 5.3153, 5.2501, 5.3740, 5.4971, 5.6195,\n 5.7411, 5.6760, 5.6112, 5.7319, 5.6675, 5.7874, 5.9065, 6.0249,\n 5.9607, 6.0784, 6.0145, 5.9510, 5.8880, 6.0047, 6.1207, 6.2361,\n 6.3509, 6.4650, 6.4019, 6.3392, 6.4526, 6.3902, 6.3283, 6.4409,\n 6.3793, 6.4911, 6.4298, 6.5410, 6.6517, 6.5906, 6.7006, 6.6398,\n 6.7492, 6.8580, 6.7974, 6.7372, 6.8454, 6.9530, 7.0601, 7.1667,\n 7.1065, 7.2125, 7.1527, 7.0932, 7.1985, 7.3034, 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: One economic study will not be the basis of Canada's public policy decisions, but Easton's research does conclusively show that there are economic benefits in the legalization of marijuana.\nHypothesis: Drug legalization has benefits.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 1.0265, 1.2366, 1.4434,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.3389, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.5164, 1.4506, 1.3856, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.5556, 1.7143, 1.6514, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.6591, 1.6008, 1.7488, 1.8956, 1.8371,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.7522, 1.8953, 1.8385, 1.7823,\n 1.9237, 1.8676, 1.8119, 1.7566, 1.7018, 1.8411, 1.7864, 1.9245,\n 2.0617, 2.0068, 2.1429, 2.0881, 2.0338, 1.9799, 1.9263, 2.0605,\n 2.0071, 1.9540, 2.0868, 2.0339, 1.9813, 1.9291, 1.8773, 2.0083,\n 1.9566, 2.0866, 2.2159, 2.1640, 2.2923, 2.2406, 2.1892, 2.1381,\n 2.0873, 2.2140, 2.1634, 2.1131, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 2.1637, 2.1145, 2.2377, 2.3603, 2.3110, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.3580, 2.3098, 2.2618, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.1913, 2.3094, 2.2624, 2.3798, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 5.9297, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.5514, 7.4066,\n 7.5707, 7.7326, 7.5916, 7.7517, 7.6140, 7.7723, 7.9286, 8.0829,\n 7.9489, 8.1016, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 8.9355, 8.8192, 8.9550, 8.8405, 8.9753, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.9291, 11.0389, 10.9462, 11.0554, 11.1640, 11.0724, 11.1803, 11.2877,\n 11.1971, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.1810, 11.2857, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.7498, 11.6666, 11.7672, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.7378, 12.6597, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.9540, 13.0460, 12.9691, 13.0608, 13.1520, 13.0758, 13.0000,\n 13.0910, 13.0157, 13.1063, 13.0316, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Archaeologists have found approximately 30 beautifully preserved mummies in a 4,000 year old Egyptian necropolis which held 53 tombs. Supervisor of Antiquities for Middle Egypt Dr. Abdel-Rahman El-Ayedi's team established his archaeological site in the Faiyum Oasis near the El-Lahun Egyptian pyramid which is just south of Cairo. Besides the mummies, the team found masks, amulets, clay pots and an offering table located in a funerary chapel. The chapel dates back to about 30 BC to 337 AD.\nHypothesis: 30 beautifully preserved mummies have been located in the south of Cairo.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.0461, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "10", + "Fraction of T in Greenlist": "40.0%", + "z-score": "1.73", + "p value": "0.0416", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Gastrointestinal bleeding can happen as an adverse effect of non-steroidal anti-inflammatory drugs such as aspirin or ibuprofen.\nHypothesis: Aspirin prevents gastrointestinal bleeding.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.5175, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.0516, -0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.1448, -0.1925,\n -0.0479, -0.0956, 0.0476, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.0919, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.4444, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.7555, 7.8889, 8.0212, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.6436,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.8958, 9.0134, 8.9285, 9.0453, 8.9612, 8.8778, 8.9940, 9.1094,\n 9.0267, 8.9448, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.6016, 9.5224, 9.6317, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.1469, 10.2516, 10.3557, 10.2790, 10.2029, 10.3065, 10.4097, 10.3341,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.7671, 10.6927, 10.7928,\n 10.7189, 10.6455, 10.7451, 10.8444, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.4300, 11.3572, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.5235, 11.4525, 11.5470, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A former employee of the company, David Vance of South Portland, said Hooper spent a lot of time on the road, often meeting with customers between Portland and Kittery.\nHypothesis: Hooper is a citizen of Portland.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "17", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "23.5%", + "z-score": "-0.14", + "p value": "0.556", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.3666, 5.2485, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.7931, 6.9378, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.2532, 7.1591, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.4233, 7.5556, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 7.9724, 7.8859, 7.8003,\n 7.9254, 7.8406, 7.9649, 8.0882, 8.2107, 8.1266, 8.2483, 8.3691,\n 8.4891, 8.4057, 8.5249, 8.4423, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.6321, 8.5516, 8.4718, 8.5879, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.0340, 9.1452, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.3212, 9.2450, 9.1694, 9.0944, 9.2032, 9.1287,\n 9.2368, 9.3443, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.6214,\n 9.7261, 9.6532, 9.7574, 9.8611, 9.9642, 9.8918, 9.8198, 9.7483,\n 9.8510, 9.7800, 9.8821, 9.8116, 9.9132, 10.0143, 10.1149, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.2743, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.5286, 10.4603, 10.3923, 10.4893, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Cyprus, divided or not, joins the EU on the 1st of May.\nHypothesis: Cyprus was divided into two parts on May 1.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -0.9078, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -0.7851, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.7318, -0.5726, -0.6222, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.7454, -0.7921,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.6274, -0.6702, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.5991, -0.6402, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.5347, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 6.8995, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 6.9903, 6.8573, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.4540, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.8853, 9.0179, 8.9086, 8.8007, 8.6942, 8.8260, 8.9567,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.0323, 9.1590, 9.0582,\n 9.1840, 9.3088, 9.2094, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.4088, 9.3140, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.3326, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.6758, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.9542, 10.0647, 9.9807, 10.0906, 10.0074,\n 10.1167, 10.2253, 10.3333, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.7822, 10.7006, 10.8051, 10.9091, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.0569, 10.9773, 10.8984, 11.0004, 11.1018, 11.0235, 11.1245, 11.2250,\n 11.3249, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.3910, 11.3150,\n 11.2396, 11.3378, 11.4356, 11.5329, 11.4581, 11.5549, 11.4806, 11.4068,\n 11.3335, 11.4300, 11.5261, 11.4533, 11.5489, 11.6441, 11.5718, 11.5000,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.6411, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: World leaders expressed concern on Thursday that North Korea will quit six-party nuclear disarmament talks and will bolster its nuclear weapons arsenal.\nHypothesis: North Korea says it has a stockpile of nuclear weapons and is building more.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.8%", + "z-score": "1.23", + "p value": "0.109", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.7237, 0.6537, 0.8444, 1.0328,\n 0.9623, 1.1476, 1.0773, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 1.1547, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.5426, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.6276, 0.7698,\n 0.9110, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.9867, 0.9366,\n 1.0735, 1.0235, 1.1593, 1.2943, 1.2441, 1.3779, 1.3278, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.4427, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.4570, 1.4087, 1.3607, 1.4881, 1.4402, 1.3926,\n 1.3453, 1.4713, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.1667,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "124", + "Fraction of T in Greenlist": "62.3%", + "z-score": "12.2", + "p value": "2.68e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.3051, 7.1317, 6.9631, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.1952, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.6140, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.7150, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 7.9472, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.2147, 9.1101, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.7986, 9.6995, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.0472, 9.9547, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.2287, 10.3409, 10.4524, 10.5632, 10.4738, 10.5841, 10.4956,\n 10.6052, 10.5175, 10.4307, 10.5397, 10.6481, 10.7559, 10.6700, 10.7772,\n 10.8838, 10.9898, 10.9048, 10.8204, 10.7367, 10.6537, 10.7594, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.2389, 11.3402, 11.2602, 11.1807, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.6840, 11.7808, 11.7050, 11.8014, 11.8973, 11.9928, 11.9176,\n 12.0127, 11.9380, 12.0327, 11.9586, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.0935, 12.1867, 12.2794, 12.3718, 12.2992, 12.2271, 12.1554])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: In addition to establishing the electoral commission, the laws also concern nationality, a contentious issue since citizenship laws were tightened to exclude one of Gbagbo's main competitors from the 2000 presidential race, former prime minister Alassane Ouattara.\nHypothesis: Gbagbo is a competitor of Ouattara.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -0.8001, -0.8660,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, 0.1879, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.5608, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.8296, 0.7746, 0.7201, 0.6660, 0.8165,\n 0.9658, 1.1140, 1.2611, 1.2060, 1.1514, 1.0973, 1.0435, 1.1882,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.3128, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.2257, 1.3625, 1.3112,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.0598, 1.0105, 1.1447, 1.0954,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.1991, 1.1513, 1.2804, 1.4087, 1.3607, 1.3131, 1.2657, 1.2185,\n 1.3453, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.9659, 1.0890, 1.0444, 1.0000,\n 1.1221, 1.0777, 1.1990, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "45.2%", + "z-score": "6.59", + "p value": "2.21e-11", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415, 3.7808, 4.0825,\n 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140, 4.5033, 4.7556, 4.5556,\n 4.3644, 4.1812, 4.0056, 3.8367, 4.0825, 4.3217, 4.1586, 4.0012, 3.8490,\n 3.7017, 3.5590, 3.7905, 4.0166, 4.2378, 4.0980, 3.9620, 4.1779, 4.0451,\n 3.9158, 4.1265, 4.3333, 4.2064, 4.0825, 4.2848, 4.1633, 4.3618, 4.2426,\n 4.1260, 4.3205, 4.2060, 4.0937, 3.9837, 4.1740, 4.3614, 4.2528, 4.1461,\n 4.0415, 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.4296, 4.6000, 4.7683, 4.9346, 4.8375, 4.7419, 4.9058, 4.8113,\n 4.9731, 4.8797, 4.7875, 4.9472, 4.8561, 4.7662, 4.6775, 4.8347, 4.9904,\n 4.9023, 4.8154, 4.7296, 4.6448, 4.5611, 4.7140, 4.8655, 5.0156, 4.9322,\n 4.8497, 4.9980, 5.1450, 5.0630, 5.2085, 5.1273, 5.2713, 5.4140, 5.5556,\n 5.4747, 5.3947, 5.3156, 5.4554, 5.3769, 5.2992, 5.2223, 5.1461, 5.0707,\n 5.2086, 5.3455, 5.2705, 5.1962, 5.1225, 5.0496, 4.9774, 5.1123, 5.2463,\n 5.1744, 5.1031, 5.0325, 5.1650, 5.0948, 5.0252, 5.1564, 5.2868, 5.2175,\n 5.3468, 5.4752, 5.6028, 5.5336, 5.4650, 5.5915, 5.5233, 5.6488, 5.5811,\n 5.5138, 5.6383, 5.5714, 5.6949, 5.6285, 5.7511, 5.8730, 5.9941, 5.9279,\n 5.8621, 5.9822, 6.1017, 6.0362, 6.1548, 6.0897, 6.2075, 6.1427, 6.0784,\n 6.1954, 6.1314, 6.2476, 6.1839, 6.1207, 6.2361, 6.1732, 6.2879, 6.2253,\n 6.3392, 6.4526, 6.5653, 6.5029, 6.4409, 6.5528, 6.6642, 6.6024, 6.7132,\n 6.6517, 6.7618, 6.8713, 6.9803, 6.9190, 6.8580, 6.7974, 6.9056, 6.8454,\n 6.7854, 6.7259, 6.6667, 6.6078, 6.7151, 6.8219, 6.7632, 6.7049, 6.6469,\n 6.5893])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Mohandas Karamchand Gandhi never received the Nobel Peace Prize, though he was nominated for it five times between 1937 and 1948.\nHypothesis: Mohandas received the Nobel Prize in 1989.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.2%", + "z-score": "-1.23", + "p value": "0.891", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 1.2472, 1.4968, 1.3926, 1.6353, 1.5323, 1.4317, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.0742, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.9180, 0.8520, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.2566, -1.2950, -1.1667,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.2309])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 1.1793, 1.4757, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.8098, 2.6811, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.7852, 2.6681, 2.8943, 3.1160, 3.3333,\n 3.5466, 3.4293, 3.6380, 3.8431, 4.0446, 4.2426, 4.1260, 4.0119,\n 3.9001, 3.7905, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.6904, 4.5847, 4.7610, 4.6568, 4.5544, 4.7278, 4.8990,\n 4.7977, 4.9666, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.4222,\n 5.3245, 5.2281, 5.3867, 5.2915, 5.4482, 5.6032, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.5008, 6.6454, 6.7886, 6.6935, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.6973, 6.8364, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.5556, 7.6867, 7.5967, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.9496, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.1302, 9.2463, 9.3617, 9.2768, 9.1927, 9.3074,\n 9.2240, 9.1414, 9.2554, 9.3686, 9.2867, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.5714, 9.6814, 9.6016, 9.5224, 9.6317, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.6145, 10.5366, 10.4594, 10.5625, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.1141, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Christian Democrats (CDU) won 35.2% of the vote, or 225 seats, against 34.3% for Chancellor Gerhard Schroeder's Social Democrats (SPD).\nHypothesis: It seems unlikely that there will be a coalition between Gerhard Schroeder's Social Democrats and Angela Merkel's Christian Democratic Union.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, 0.0842, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -0.8978, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.5377, -1.5752, -1.4471, -1.3197, -1.3574, -1.3950, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 0.5774,\n 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774, 0.4201, 0.8165,\n 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428, 1.2702, 1.5852, 1.8889,\n 1.7457, 2.0370, 2.3190, 2.1776, 2.4495, 2.3116, 2.1783, 2.0494, 1.9245,\n 2.1831, 2.0605, 1.9415, 1.8257, 1.7132, 1.9599, 1.8489, 2.0889, 1.9795,\n 1.8728, 2.1054, 2.3333, 2.2269, 2.1229, 2.0211, 1.9215, 2.1412, 2.3570,\n 2.5690, 2.4689, 2.3706, 2.5775, 2.7811, 2.6833, 2.8830, 2.7863, 2.6914,\n 2.5981, 2.5064, 2.7005, 2.6098, 2.5205, 2.4327, 2.3462, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.6491, 2.8292, 2.7456, 2.6632, 2.5820, 2.5019,\n 2.6778, 2.8518, 3.0237, 2.9433, 2.8638, 3.0330, 3.2004, 3.1211, 3.2863,\n 3.2077, 3.1300, 3.0533, 2.9775, 3.1394, 3.0641, 2.9897, 2.9161, 2.8433,\n 3.0022, 2.9299, 3.0870, 3.0151, 2.9439, 3.0989, 3.2525, 3.1814, 3.1111,\n 3.0415, 2.9726, 3.1236, 3.2733, 3.4217, 3.5689, 3.4995, 3.6452, 3.7897,\n 3.7205, 3.8636, 3.7947, 3.7265, 3.6590, 3.5920, 3.7330, 3.6664, 3.6004,\n 3.5350, 3.4701, 3.6091, 3.5446, 3.6824, 3.6181, 3.5544, 3.6908, 3.8262,\n 3.7626, 3.6995, 3.6369, 3.5748, 3.7084, 3.8411, 3.9729, 3.9107, 3.8490,\n 3.9795, 4.1092, 4.0476, 4.1763, 4.1150, 4.0541, 3.9936, 3.9337, 4.0608,\n 4.0011, 3.9418, 3.8829, 3.8244, 3.9501, 3.8919, 4.0166, 3.9586, 3.9010,\n 4.0247, 4.1477, 4.0901, 4.0330, 3.9762, 3.9198, 4.0415, 4.1624, 4.2827,\n 4.2262, 4.1700, 4.2893, 4.4080, 4.3519, 4.4698, 4.4140, 4.3585, 4.3033,\n 4.2485, 4.3652, 4.3106, 4.2563, 4.2023, 4.1487, 4.2642, 4.2108, 4.3256,\n 4.2723, 4.2193, 4.3333, 4.4468, 4.3938, 4.3412, 4.2889, 4.2369, 4.3492,\n 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Edward VIII became King in January of 1936 and abdicated in December.\nHypothesis: King Edward VIII abdicated in December 1936.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.0999, -1.1677, -0.9258,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.1901, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.2844, 0.4529,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.8295, 0.9909, 0.9316, 1.0911,\n 1.0319, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.0000,\n 0.1273, 0.0847, 0.2111, 0.1684, 0.2940, 0.2513, 0.2089, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.3299, 0.2879, 0.4103, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.6108, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 6.9714,\n 6.8641, 7.0133, 6.9076, 7.0553, 6.9511, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.8520, 7.7555, 7.6603, 7.7937, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.8228, 8.7327, 8.6436,\n 8.7652, 8.6770, 8.7978, 8.7104, 8.8304, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.2753, 10.3805, 10.3020, 10.2242,\n 10.3289, 10.2516, 10.3557, 10.2790, 10.2029, 10.1273, 10.2310, 10.3341,\n 10.4367, 10.5388, 10.4638, 10.5654, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.5998, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 10.8505, 10.9480, 10.8770, 10.8064, 10.9034, 10.8333,\n 10.9299, 11.0261, 11.1218, 11.0521, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Australian Guantanamo Bay detainee David Hicks has won British citizenship, opening the door for a possible bid to have him freed from the US detention facility in Cuba. Justice Lawrence Collins of the British High Court has overturned the British Government's refusal of citizenship to Hicks, whose mother was born in England. Justice Collins said the Government had \"no power to withhold or deprive citizenship\". Justice Collins said: \"In my view it would be improper to fail to give assistance which otherwise would have been given, simply because the claimant was believed to be involved in terrorism and has not had any previous connection with this country.\" Hicks' lawyer, Stephen Grosz, said the decision was a breakthrough. He said there was now no reason why Hicks should not enjoy the same protection as the nine other British citizens released without charge from Guantanamo Bay on representations of the British Government.\nHypothesis: Stephen Grosz is the British lawyer of David Hicks.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "18", + "Fraction of T in Greenlist": "28.6%", + "z-score": "0.655", + "p value": "0.256", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -0.7303,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, 0.2379, 0.4714, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.2525, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.2717, 9.4087, 9.2828, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.4560, 9.5876, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.7586, 9.8852, 10.0107, 9.8995,\n 9.7897, 9.6813, 9.8064, 9.9304, 9.8237, 9.9469, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.0188, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 12.1741, 12.0798,\n 11.9863, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.2150, 12.3168,\n 12.2263, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.5615, 12.4746, 12.3883, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.5129, 12.6103, 12.5264, 12.6234, 12.7199, 12.8160, 12.7329, 12.6504,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.2542, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.3422, 14.2640, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Rockweed has been harvested commercially in Nova Scotia since the late 1950's and is currently the most important commercial seaweed in Atlantic Canada.\nHypothesis: Marine vegetation is harvested.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -0.9401, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -1.0050, -1.0513, -1.0973, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.3112,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.1942, -1.0531, -1.0954,\n -1.1375, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.2049, -1.2445, -1.2839, -1.3230, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 2.3113, 2.5560,\n 2.7952, 2.6726, 2.5533, 2.7852, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 2.0428, 2.2576, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.0870, 1.9959, 2.1997, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.2404, 2.1546, 2.3462, 2.5352, 2.7217,\n 2.6354, 2.8189, 3.0000, 2.9140, 3.0924, 3.2686, 3.4427, 3.6148,\n 3.7849, 3.9530, 4.1192, 4.2836, 4.1957, 4.1090, 4.2710, 4.4313,\n 4.5899, 4.7469, 4.9023, 4.8154, 4.7296, 4.8830, 5.0350, 4.9497,\n 5.1000, 5.0156, 4.9322, 4.8497, 4.9980, 5.1450, 5.0630, 4.9820,\n 5.1273, 5.2713, 5.4140, 5.3333, 5.4747, 5.6149, 5.7540, 5.8919,\n 6.0287, 6.1644, 6.2991, 6.4327, 6.5653, 6.6968, 6.6157, 6.7462,\n 6.8757, 7.0043, 6.9237, 6.8439, 6.7648, 6.8922, 7.0187, 7.1443,\n 7.0658, 7.1904, 7.3143, 7.2363, 7.1590, 7.2818, 7.4039, 7.3271,\n 7.4483, 7.3721, 7.4924, 7.6120, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.3140, 8.4286, 8.5424, 8.4664, 8.3910, 8.5041, 8.6166,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.9851, 9.0944, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.8433, 9.7688, 9.6948, 9.7996,\n 9.9038, 9.8303, 9.7574, 9.6850, 9.6130, 9.5416, 9.6452, 9.7483,\n 9.8510, 9.7800, 9.8821, 9.9837, 10.0848, 10.1855, 10.1149, 10.2151,\n 10.1450, 10.2447, 10.3439, 10.2743, 10.3730, 10.3038, 10.4021, 10.5000,\n 10.4312, 10.3628, 10.4603, 10.3923, 10.3248, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Pakistan President Pervez Musharraf has ordered security forces to take firm action against rioters following the assassination of opposition leader Benazir Bhutto. The violence has left at least 44 people dead and dozens injured. Mr. Musharraf insisted the measures were to protect people. VOA's Ayaz Gul reports from Islamabad that a bitter dispute has also erupted over how the 54-year-old politician died and who was behind her assassination.\nHypothesis: Musharraf has ordered rioters to take firm action against security forces.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 2.0656, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.4659, 2.7406, 2.5924, 2.8577,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.5627, 2.8098, 2.6811, 2.9212,\n 2.7952, 3.0290, 3.2577, 3.1334, 3.3566, 3.2348, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.4101, 3.2998, 3.1918, 3.3947,\n 3.2883, 3.4873, 3.3824, 3.5777, 3.7700, 3.6662, 3.8552, 3.7528,\n 3.6522, 3.8376, 3.7383, 3.9208, 4.1008, 4.0024, 3.9056, 3.8103,\n 3.7166, 3.8927, 3.8000, 3.9736, 3.8819, 4.0531, 4.2222, 4.1312,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.1957, 4.3580, 4.5186, 4.4313,\n 4.3451, 4.2601, 4.1761, 4.3339, 4.2507, 4.1684, 4.0872, 4.0069,\n 3.9276, 3.8492, 3.7717, 3.9260, 4.0788, 4.0016, 3.9253, 3.8497,\n 3.7750, 3.9254, 3.8512, 3.7778, 3.7051, 3.6332, 3.5620, 3.4915,\n 3.4217, 3.5689, 3.7148, 3.6452, 3.5762, 3.5079, 3.4402, 3.5839,\n 3.5166, 3.4499, 3.3838, 3.3182, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.4058, 3.3420, 3.2788, 3.2161, 3.1539, 3.2918, 3.2299, 3.1685,\n 3.1076, 3.0471, 2.9872, 2.9277, 2.8687, 3.0039, 3.1382, 3.0792,\n 3.0206, 2.9625, 2.9048, 3.0373, 2.9798, 2.9227, 2.8660, 2.8098,\n 2.7539, 2.6984, 2.6433, 2.7735, 2.9029, 2.8478, 2.7930, 2.7386,\n 2.6846, 2.8124, 2.7585, 2.7050, 2.6519, 2.5990, 2.5466, 2.4944,\n 2.4426, 2.5683, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.4099, 2.3603, 2.3110, 2.2620, 2.3835,\n 2.5044, 2.4553, 2.4065, 2.3580, 2.3098, 2.4294, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.1913, 2.1444, 2.0979, 2.2156, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 6.7893,\n 6.9589, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.4188, 8.5649, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.7419, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.9169, 8.7986, 8.9355, 9.0711, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 10.1124, 10.2375, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.3812, 10.2706, 10.3923, 10.5131, 10.6329, 10.5243, 10.4169,\n 10.3109, 10.2061, 10.3257, 10.4444, 10.5623, 10.6793, 10.7955, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.9355, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.0746, 11.1860, 11.0883, 11.1991, 11.3091, 11.4184, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.4574, 11.5645, 11.6709, 11.7766, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.9060, 12.0096, 11.9187, 12.0218, 11.9319,\n 12.0345, 12.1366, 12.0476, 12.1492, 12.2503, 12.3508, 12.2628, 12.3629,\n 12.2758, 12.3754, 12.4746, 12.5732, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.6103, 12.7073, 12.8037, 12.8997, 12.8160, 12.9116, 12.8285,\n 12.9238, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 12.9755, 12.8957, 12.9891, 13.0821, 13.1746, 13.2668, 13.1878, 13.1094,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.2429, 13.1667,\n 13.2572, 13.1815, 13.1063, 13.1966, 13.2864, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A month after Gov. David A. Paterson dropped his proposal for a soda tax, New York City's health commissioner has written an article advocating \"hefty\" taxes on sodas and sports drinks containing sugar. Such a tax, the article said, could be the biggest boon to public health since tobacco taxes. The commissioner, Dr. Thomas R. Frieden, and Kelly D. Brownell of Yale University, his co-author, argue in the New England Journal of Medicine that a tax of a penny per ounce could reduce consumption by more than 10 percent and raise $1.2 billion a year in New York State alone.\nHypothesis: Michael Bloomberg is the mayor of New York.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -2.3094, -2.3729, -2.4351, -2.4962, -2.1909,\n -1.8935, -1.9599, -2.0250, -1.7408, -1.8074, -1.5323, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -0.9949, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.4857, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.5579, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.4644, -0.5053, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.5433, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 6.8457, 7.0219, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.5010, 8.3589, 8.2195, 8.3716,\n 8.2353, 8.3859, 8.2525, 8.4017, 8.5491, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.6000, 8.4770, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.1101, 9.0067, 9.1343, 9.2609, 9.1590, 9.0582,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.0984, 10.0029, 9.9085,\n 10.0249, 10.1405, 10.0472, 9.9547, 10.0698, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.2287, 10.1391, 10.2514, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.6052, 10.5175, 10.6265, 10.7349, 10.6481, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.0102, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.2389, 11.1588, 11.2602, 11.3610, 11.4614, 11.5613, 11.4819, 11.5813,\n 11.6802, 11.6016, 11.7000, 11.7980, 11.8956, 11.9927, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.1260, 12.2214, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.6153, 12.7082, 12.6323, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 13.0316, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Officials said the Finnish suspect was among the dead but did not provide a motive for the attack.\nHypothesis: Officials said that the suspect, a Finnish citizen, was among the dead.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.0534, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.5695, 0.5203, 0.4714, 0.6108, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.6351, 0.7688, 0.7213, 0.6742, 0.8066, 0.9382, 1.0690,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.8245, 0.9520, 0.9062, 0.8607, 0.9870, 1.1127, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 1.0444, 1.1667,\n 1.1221, 1.0777, 1.1990, 1.3197, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.3570,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.8301, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 3.0290, 2.9055, 2.7852, 2.6681, 2.8943, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.3147, 3.5228, 3.7273, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.6571, 4.5461, 4.7281, 4.9075,\n 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 5.1065, 5.0019, 5.1711,\n 5.0680, 5.2350, 5.4000, 5.5630, 5.7242, 5.6220, 5.7812, 5.9386,\n 5.8377, 5.9932, 5.8936, 6.0474, 6.1996, 6.1012, 6.0041, 6.1546,\n 6.3035, 6.4510, 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.9759, 6.8819, 7.0211, 7.1591, 7.0662, 6.9743, 6.8834, 6.7937,\n 6.7049, 6.8414, 6.9768, 7.1111, 7.2443, 7.3765, 7.5076, 7.4194,\n 7.3322, 7.2459, 7.3758, 7.5048, 7.6328, 7.5472, 7.6742, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.0882, 8.0042, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.3231, 8.4423, 8.5607, 8.4788, 8.5964, 8.5153,\n 8.4348, 8.3550, 8.4718, 8.3927, 8.5088, 8.4303, 8.3525, 8.2754,\n 8.3906, 8.5052, 8.4286, 8.3526, 8.2772, 8.3910, 8.5041, 8.6166,\n 8.5417, 8.6535, 8.5792, 8.6903, 8.8008, 8.9107, 9.0200, 8.9461,\n 9.0548, 9.1629, 9.2704, 9.3774, 9.3040, 9.2311, 9.3374, 9.4432,\n 9.3708, 9.4761, 9.4042, 9.5089, 9.4375, 9.5416, 9.4707, 9.5743,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 10.0448,\n 9.9752, 10.0753, 10.0061, 10.1058, 10.2050, 10.1363, 10.0679, 10.1667,\n 10.2650, 10.3628, 10.2949, 10.3923, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Britain agreed to lift by March 31 a 150-mile military protection zone enforced around the islands since Argentina invaded them in 1982.\nHypothesis: The military protection zone around Falklands was lifted.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 1.1648, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.2599, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.3483, 1.5164, 1.4506, 1.3856, 1.3213, 1.4863, 1.4222, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.7488, 1.6906, 1.8371,\n 1.9825, 2.1268, 2.0682, 2.0101, 2.1527, 2.0948, 2.0373, 1.9803,\n 1.9237, 2.0642, 2.0078, 1.9518, 2.0907, 2.0349, 1.9795, 1.9245,\n 1.8699, 1.8157, 1.9524, 1.8983, 2.0338, 1.9799, 1.9263, 2.0605,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.9813, 1.9291, 1.8773, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.7529, 1.8821, 1.8317, 1.9599,\n 2.0873, 2.0369, 2.1634, 2.1131, 2.0631, 2.0134, 1.9640, 1.9149,\n 1.8660, 1.8175, 1.9419, 1.8935, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.7780, 1.7310, 1.6843, 1.6378, 1.7592, 1.7128, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.7233, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 1.8898, 1.7233, 2.0466, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.7626, 3.6108, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.0000,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.3249, 6.2106, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.7333, 6.8876, 7.0401, 6.9294, 6.8205, 6.9714,\n 6.8641, 6.7583, 6.9076, 6.8034, 6.9511, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.1176, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.3164, 8.2222, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 8.8544,\n 8.9752, 8.8860, 9.0060, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.5054,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.4812, 9.5931, 9.7044, 9.8150,\n 9.9249, 10.0342, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.1151,\n 10.2220, 10.3284, 10.2486, 10.1695, 10.0910, 10.1968, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.5366, 10.4594, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.8170, 10.9176, 10.8421, 10.9422, 10.8673, 10.9669,\n 10.8925, 10.9917, 11.0904, 11.0165, 11.1148, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.2607, 11.3572, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The ferry owner PT Nur Budi's spokesman blamed Indonesian port authorities for the tragedy. \"The passenger capacity of the ferry is 205 people but the port administrator accepted more passengers as they thought it was possible,\" he said. The National Meteorological and Geophysics Agency, however, had published and raised an alert signal about high waves on Friday. It specifically stated that \"Saturday 10th and Sunday 11th, Indonesian waters would have witnessed storm force waves,\" but despite the dire warnings KM Teratai set for the seas.\nHypothesis: An Indonesian ferry with 300 passengers sank.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.2431, -0.2907, -0.1448, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.1295, -0.1721, -0.2146, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "202", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "60.9%", + "z-score": "11.8", + "p value": "2.46e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 6.8034, 6.9511, 6.8483, 6.7469, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.7416, 6.8849, 6.7886, 6.9305, 6.8354,\n 6.7414, 6.6486, 6.7890, 6.9282, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.5210, 8.6436,\n 8.7652, 8.6770, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.1302, 9.2463, 9.1615, 9.0773, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.4513, 9.3686, 9.4812, 9.3993, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.7619, 9.8712, 9.9800, 9.8995, 9.8197, 9.9278,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.2753, 10.3805, 10.4852, 10.4067,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.5625, 10.4858, 10.5884, 10.6904,\n 10.7920, 10.7159, 10.6404, 10.5654, 10.6665, 10.7671, 10.6927, 10.7928,\n 10.8925, 10.8186, 10.9178, 11.0165, 11.1148, 11.2126, 11.1392, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.4766, 11.4047, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056, 11.7347, 11.8280, 11.7576,\n 11.8504, 11.7804])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Tension and battles between Greek police and anarchist demonstrators took place in the centre of Athens, Greece, during the anti-war demonstration of the 4th European Social Forum which is taking place in the Greek capital, from 4 to 7 of May 2006. The march of the approximately 1,000 anarchists ended with clashes between groups of anarchists and police. Riot police used tear gas, while a branch of a Greek bank, a fast-food store and around 50 shop windows in central Athens were damaged.\nHypothesis: The riots in Greece started on December 6.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.2039, -1.0178, -1.0719, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -0.9766, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.5855, -0.6321, -0.4845, -0.3380, -0.3849,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.5347, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 3.3947, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.6000, 7.4878,\n 7.6339, 7.7784, 7.6681, 7.5593, 7.7026, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.4449, 8.5749, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.5396, 8.4444, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.3326, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.6920, 10.7987, 10.9048, 10.8204, 10.9259, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.2789, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.4411, 11.5414, 11.6412, 11.5613, 11.4819, 11.5813,\n 11.6802, 11.6016, 11.5234, 11.6220, 11.7200, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.9534, 12.0493, 11.9730, 12.0685, 12.1635, 12.2581,\n 12.1825, 12.2767, 12.3705, 12.4638, 12.3888, 12.4818, 12.4074, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Libya's case against Britain and the US concerns the dispute over their demand for extradition of Libyans charged with blowing up a Pan Am jet over Lockerbie in 1988.\nHypothesis: One case involved the extradition of Libyan suspects in the Pan Am Lockerbie bombing.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.0998, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -0.9711, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.1929, -1.2309, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.1137, 6.3254, 6.5320,\n 6.7338, 6.9310, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 7.6615, 7.4838, 7.6594, 7.8320, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.9567, 9.1084, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.6743, 9.8150,\n 9.9540, 10.0915, 9.9454, 9.8020, 9.6612, 9.7989, 9.6612, 9.5258,\n 9.6630, 9.5304, 9.4000, 9.2717, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.0791, 9.9570, 10.0857, 10.2132, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.7164, 10.8379, 10.7215, 10.8423,\n 10.7277, 10.8477, 10.7348, 10.8542, 10.9727, 11.0902, 11.2069, 11.3228,\n 11.4378, 11.3276, 11.2187, 11.3333, 11.4471, 11.5601, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.2314, 12.3377, 12.2360, 12.1353, 12.2414, 12.3468, 12.2474,\n 12.3524, 12.4567, 12.5604, 12.6635, 12.7660, 12.6684, 12.5717, 12.6739,\n 12.5782, 12.6800, 12.5853, 12.6867, 12.7875, 12.8877, 12.7943, 12.8942,\n 12.8017, 12.9011, 13.0000, 13.0984, 13.1962, 13.1050, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.7818, 13.8756,\n 13.7870, 13.8804, 13.9735, 14.0660, 14.1582, 14.2499, 14.3412, 14.2539,\n 14.1673, 14.2584, 14.1725, 14.2633, 14.1781, 14.2686, 14.3587, 14.4484,\n 14.3642, 14.4536, 14.5426, 14.4591, 14.5479, 14.4651, 14.5535, 14.4714,\n 14.3897, 14.3087, 14.3970, 14.3166, 14.4046, 14.3248, 14.4126, 14.3333,\n 14.4208, 14.5080, 14.4294, 14.3513, 14.4382, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Argentina sought help from Britain on its privatization program and encouraged British investment.\nHypothesis: Argentina sought UK expertise on privatization and agriculture.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.0512, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.1239,\n -1.1669, -1.0235, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.6057, -1.4713, -1.3377, -1.3771, -1.4162, -1.2839, -1.3230, -1.1918,\n -1.2310, -1.1007, -1.1399, -1.1790, -1.0499, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.8990,\n 4.4264, 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.8411, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.0980, 3.9620, 3.8297, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.2064, 4.0825, 4.2848, 4.1633, 4.0446, 3.9284, 4.1260, 4.3205,\n 4.2060, 4.0937, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.3301,\n 4.2251, 4.4061, 4.3026, 4.2008, 4.3788, 4.2784, 4.4537, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.2981, 5.1978, 5.3605, 5.5213, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.5435, 5.6986, 5.6032, 5.5090, 5.6622,\n 5.8139, 5.9641, 6.1128, 6.2601, 6.4059, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.8819, 7.0211, 6.9282, 7.0662, 7.2029, 7.1110, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.1111, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.8304, 8.7439, 8.6581, 8.7773,\n 8.6924, 8.8108, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.1692, 10.0881, 10.1955, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.9773, 10.8984, 11.0004, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.3249, 11.4244, 11.5234, 11.4459, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.1825, 12.1073, 12.2016, 12.2954, 12.3888, 12.4818, 12.5745, 12.5000,\n 12.5923, 12.6841, 12.6102, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: \"Instead of action on crime, we got the federal long gun registry, which became a bloated bureaucratic nightmare to responsible hunters, farmers and rural Canadians. It cost taxpayers some CA$2 billion and it hasn't done a thing to reduce gun crime.\" said Harper. The Conservatives have provided amnesty for unregistered gun owners. At this time there is no legislation set before the House of Commons. Conservative Garry Breitkreuz from Saskatchewan tabled the bill killing the long-gun registry.\nHypothesis: Garry Breitkreuz is a member of the Conservative Party.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "180", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "23.3%", + "z-score": "-0.516", + "p value": "0.697", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.4124, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.0094, -0.8372, -0.6667, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.7593, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.7396, -0.7833, -0.8268, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.5843, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.5283, -0.5706, -0.6128, -0.6547, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.5164])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.3760, 7.2488, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.8296, 7.7139, 7.8598, 7.7460,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.3217, 8.4560, 8.3521, 8.2496,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.7629, 8.6667, 8.7927, 8.6976, 8.6035, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.2435, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.7312, 9.8430, 9.7590, 9.6757, 9.5931, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 10.1692, 10.0881, 10.0076, 10.1151,\n 10.0353, 10.1423, 10.0631, 9.9846, 9.9067, 10.0131, 10.1189, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.5388, 10.4638, 10.3893, 10.4909, 10.5921, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.9178, 10.8444, 10.7714, 10.8702, 10.7978, 10.8961,\n 10.8241, 10.7527, 10.6817, 10.7795, 10.8770, 10.8064, 10.9034, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.2171, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The researchers in the latest study fed one group of mice a diet in which 60 percent of calories came from fat. The diet started when the mice, all males, were 1 year old, which is middle-age in mouse longevity. As expected, the mice soon developed signs of impending diabetes, with grossly enlarged livers, and started to die much sooner than mice fed a standard diet.\nHypothesis: At the age of one year, male mice were fed with a diet in which 60 percent of calories came from fat.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -1.9118, -1.9711, -2.0294, -1.7889, -1.8481, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.3554, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.5756, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.9369, -2.9762, -3.0151, -3.0538, -3.0923, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.0851, -3.1229, -2.9659, -3.0039, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.2206, -3.2567, -3.1071, -3.1433, -2.9950, -3.0315, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -2.8853, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.8174, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.0094, -0.8372, -0.8889, -0.7189, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.1615, 0.0000, -0.0534, 0.1063, 0.0529, 0.0000,\n 0.1575, 0.1045, 0.2603, 0.4148, 0.3615, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.7089, 0.8575, 1.0050, 1.1514, 1.2968, 1.4410, 1.5842,\n 1.7264, 1.6710, 1.8119, 1.9518, 1.8962, 1.8411, 1.9795, 1.9245,\n 2.0617, 2.0068, 1.9524, 2.0881, 2.0338, 2.1685, 2.3022, 2.4351,\n 2.5672, 2.6984, 2.8288, 2.7735, 2.9029, 3.0315, 2.9761, 2.9212,\n 2.8666, 2.9938, 3.1203, 3.2460, 3.3710, 3.4953, 3.6188, 3.7417,\n 3.8638, 3.9853, 3.9294, 4.0501, 4.1700, 4.2893, 4.2334, 4.1779,\n 4.1226, 4.0678, 4.0132, 4.1312, 4.0768, 4.0228, 3.9691, 4.0860,\n 4.0325, 4.1487, 4.0953, 4.2108, 4.1576, 4.2723, 4.2193, 4.1667,\n 4.2805, 4.3938, 4.5066, 4.6188, 4.5659, 4.5134, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The soldiers, who were said to have been wearing Arab headdress, were accused of firing at Iraqi police when stopped at a road block.\nHypothesis: The soldiers were driving a civilian car and were dressed in civilian clothes when a shooting took place between them and Iraqi patrols.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.4606,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.3646, 1.2778, 1.4907, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.4812, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 1.0523, 0.9847,\n 1.1628, 1.0954, 1.0289, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, 0.1063, 0.0529, 0.2108,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.5680, 0.5143, 0.6660, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.1273, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 5.7735,\n 6.0212, 6.2598, 6.4902, 6.7132, 6.4254, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.5378, 7.7232, 7.9048, 8.0829, 8.2577, 8.0546, 8.2281, 8.3984,\n 8.5659, 8.7305, 8.5404, 8.3557, 8.5206, 8.6828, 8.8426, 9.0000,\n 8.8252, 8.9815, 9.1355, 9.2874, 9.4373, 9.2710, 9.4198, 9.5668,\n 9.7119, 9.8553, 9.9969, 9.8387, 9.9795, 10.1187, 10.2562, 10.1036,\n 9.9540, 9.8072, 9.9454, 10.0820, 10.2172, 10.3510, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.4667, 10.3314, 10.1983, 10.3288, 10.4581, 10.5862,\n 10.7131, 10.5838, 10.7099, 10.8350, 10.9589, 10.8328, 10.7084, 10.5859,\n 10.7098, 10.8327, 10.9546, 11.0755, 10.9559, 11.0761, 11.1954, 11.3137,\n 11.1966, 11.0810, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.3228,\n 11.4378, 11.5519, 11.6652, 11.5556, 11.4471, 11.3399, 11.4531, 11.5655,\n 11.6772, 11.7881, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.1244, 12.2314, 12.3377, 12.4434, 12.5485, 12.4471, 12.5517, 12.6557,\n 12.7590, 12.8618, 12.7622, 12.6635, 12.7660, 12.8679, 12.9692, 13.0699,\n 12.9728, 13.0732, 13.1730, 13.2722, 13.3710, 13.2753, 13.3737, 13.2791,\n 13.3770, 13.4745, 13.5714, 13.6679, 13.5746, 13.6707, 13.7663, 13.8615,\n 13.9561, 13.8642, 13.7730, 13.8675, 13.9615, 14.0550, 14.1481, 14.0582,\n 14.1510, 14.2433, 14.3352, 14.4267, 14.5178, 14.4292, 14.3412, 14.4321,\n 14.5226, 14.6126, 14.7023, 14.6155, 14.7049, 14.7939, 14.8825, 14.9707,\n 14.8849, 14.7998, 14.7152, 14.8034, 14.8912, 14.9786, 15.0657, 14.9821,\n 15.0689, 15.1553, 15.2414, 15.3272, 15.2446, 15.1625, 15.0810, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The United States would like to see U.N. weapons inspectors return to Iraq providing the Iraqis take concrete, affirmative and demonstrable actions \"to show full co-operation\" Clinton said.\nHypothesis: U.N. weapons inspectors could stay in Iraq, Clinton said.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "79", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.195", + "p value": "0.577", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "71", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "42.3%", + "z-score": "3.36", + "p value": "0.000393", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.8034, 1.6859, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.6679, 2.5621, 2.7757, 2.6713, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.6833, 2.5873, 2.7863, 2.6914, 2.8868,\n 3.0793, 3.2691, 3.1741, 3.3607, 3.2667, 3.1743, 3.3574])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: On 12 August, the San Carlos Battalion came across mines placed in their path and one soldier was killed while two were seriously injured. Meanwhile on 10 August, urban commandos took a patrol car by surprise and dropped a grenade inside the car, injuring four and partially destroying the vehicle.\nHypothesis: A patrol car was attacked by the San Carlos Battalion.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.0949, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.8791, 2.0870, 1.9959, 1.9064, 2.1094, 2.3094,\n 2.2200, 2.4163, 2.3276, 2.2404, 2.4327, 2.6222, 2.5352, 2.7217,\n 2.9057, 2.8189, 2.7333, 2.9140, 3.0924, 3.0071, 3.1829, 3.0984,\n 3.0151, 3.1879, 3.3587, 3.2757, 3.4442, 3.6109, 3.5282, 3.4466,\n 3.6107, 3.7732, 3.6919, 3.6116, 3.5322, 3.4538, 3.3764, 3.5355,\n 3.4586, 3.3826, 3.5396, 3.4641, 3.3895, 3.3156, 3.2426, 3.1704,\n 3.3243, 3.2525, 3.4047, 3.5556, 3.4839, 3.4130, 3.3428, 3.2733,\n 3.2044, 3.3526, 3.2841, 3.2163, 3.3627, 3.2953, 3.2285, 3.1623,\n 3.0967, 3.0317, 3.1755, 3.1109, 3.2533, 3.3947, 3.3301, 3.2660,\n 3.2025, 3.1395, 3.0770, 3.2161, 3.1539, 3.0923, 3.2299, 3.1685,\n 3.1076, 3.0471, 2.9872, 2.9277, 3.0632, 3.0039, 3.1382, 3.2717,\n 3.2124, 3.1536, 3.0952, 3.0373, 2.9798, 3.1113, 3.0540, 2.9971,\n 3.1273, 3.0706, 3.0143, 2.9584, 2.9029, 2.8478, 2.9761, 2.9212,\n 3.0486, 3.1753, 3.1203, 3.0657, 3.0114, 2.9575, 2.9040, 3.0290,\n 2.9756, 2.9225, 3.0464, 2.9935, 2.9410, 2.8887, 2.8368, 2.7852,\n 2.9076, 2.8561, 2.9776, 3.0984, 3.0469, 2.9957, 2.9448, 2.8943,\n 2.8440, 2.9633, 2.9132, 2.8633, 2.9817, 2.9320, 2.8825, 2.8333,\n 2.7844, 2.7358, 2.8528, 2.8043, 2.9205, 3.0363, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.1152, 8.0139,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.3678, 9.4868,\n 9.6050, 9.5133, 9.4225, 9.3326, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.8204, 10.7367, 10.6537, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.8051, 10.7242, 10.8282, 10.9317, 10.8515, 10.9545,\n 11.0569, 11.1588, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.6016, 11.7000, 11.7980, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.6840, 11.7808, 11.8771, 11.9730, 11.8973, 11.8221, 11.9176,\n 12.0127, 11.9380, 12.0327, 12.1270, 12.2209, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Bomb-sniffing dogs were brought to Rodriguez's Mulberry St. apartment.\nHypothesis: Bomb-sniffing dogs were estimated at Rodriguez's Mulberry St. apartment.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.2425, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.2074, -0.0516, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.2894, -0.3299, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 5.6667,\n 5.8560, 5.7155, 5.9017, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.1143, 7.2667, 7.1525, 7.3033, 7.1909, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.0822, 8.2178, 8.1152, 8.2496,\n 8.1483, 8.2816, 8.1816, 8.3138, 8.4449, 8.5749, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 8.9178, 9.0419, 8.9469,\n 9.0702, 8.9763, 9.0987, 9.0057, 9.1273, 9.0354, 9.1561, 9.2760,\n 9.3951, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.7622, 9.8753, 9.7890, 9.9015,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.4652, 10.5714, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.5393, 10.6439, 10.7480, 10.8515, 10.7719,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.1245, 11.2250,\n 11.1473, 11.2473, 11.1702, 11.2698, 11.1933, 11.2924, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.4356, 11.3608, 11.4581, 11.5549, 11.6514, 11.7473,\n 11.8429, 11.7687, 11.8638, 11.7901, 11.8849, 11.8117, 11.9060, 11.8333,\n 11.9273, 11.8551, 11.9487, 12.0419, 12.1347, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Startling new research into mobile phones claims they may reduce a man's sperm count by up to 30%.\nHypothesis: Male fertility may be affected by use of a mobile phones.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.6702, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.3951, -0.2626, -0.3055, -0.3482,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.9286, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.1493, 9.0401, 8.9324, 8.8260, 8.9567,\n 9.0863, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.5556, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.7678, 9.8858, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.0698, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.6534, 10.7635, 10.8729, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.1435, 11.2493, 11.1621,\n 11.2674, 11.3721, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.9181, 11.8336, 11.9341, 11.8503, 11.7672, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.0008, 12.0990, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.6130, 12.5336,\n 12.6283, 12.5495, 12.6439, 12.7378, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.7082, 12.6323, 12.5568, 12.6494, 12.7416, 12.6667,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 13.0477, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Workers at the Lufthansa Technik Automotive plant in Co Dublin are due to vote today on changes to work practices being sought by the company's management. The aircraft engine maintenace firm has warned that rejection of the proposals would threaten a planned \u20ac17230m investment in the Rathcoole plant and put jobs at risk. LTA says it is not seeking any lay-offs or pay cuts and the planned investment would guarantee the future of the plant for the next 15 years. The work-practice changes are being sought following the collapse of Labour Relations Commission talks on the matter.\nHypothesis: Lufthansa Technik Automotive fires 30 workers.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 1.0835, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.3944, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.2603, 0.4148, 0.3615, 0.5143, 0.4611, 0.6124,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.5941,\n 0.7399, 0.6881, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.4481, 0.5808, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.7878, 0.9165, 0.8704,\n 0.9981, 1.1251, 1.0788, 1.0328, 0.9870, 1.1127, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.3933, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 7.7426, 7.8928,\n 7.7710, 7.6512, 7.5333, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779,\n 7.7723, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.5661, 7.6995,\n 7.6064, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 8.1651, 8.0741,\n 7.9839, 7.8948, 7.8065, 7.9336, 7.8463, 7.7598, 7.6742, 7.5895,\n 7.7155, 7.6315, 7.5484, 7.6734, 7.7976, 7.7152, 7.6335, 7.5526,\n 7.4724, 7.5955, 7.5161, 7.4373, 7.3592, 7.4813, 7.6026, 7.5251,\n 7.6456, 7.5687, 7.4924, 7.6120, 7.5364, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.7407, 7.8571, 7.9729, 7.8988, 7.8253, 7.7524, 7.6800,\n 7.6082, 7.5369, 7.6517, 7.5809, 7.5106, 7.6246, 7.7380, 7.8507,\n 7.9628, 8.0742, 8.1851, 8.1150, 8.0455, 8.1556, 8.2652, 8.1960,\n 8.3050, 8.2362, 8.3446, 8.4523, 8.5595, 8.4911, 8.5978, 8.7039,\n 8.8094, 8.7414, 8.6738, 8.6066, 8.5399, 8.4736, 8.4078, 8.3423,\n 8.4471, 8.3820, 8.3173, 8.2531, 8.1892, 8.2933, 8.2298, 8.3333,\n 8.4364, 8.3732, 8.4757, 8.4128, 8.5148, 8.6164, 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: It rewrites the rules of global trade, established by the General Agreement on Tariffs and Trade, or GATT, in 1947, and modified in multiple rounds of negotiations since then.\nHypothesis: GATT was formed in 1947.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.6330,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.6449, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.1476, -0.9506, -1.0079, -1.0646, -0.8716, -0.6809, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.9115, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.0215,\n -1.0612, -1.1007, -0.9711, -0.8422, -0.8819, -0.7539, -0.7937, -0.6667,\n -0.7065, -0.7461, -0.6202, -0.4949, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 3.9056, 4.1851, 3.9727, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.1355, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.8458, 7.0000, 6.8876, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.2874, 9.1925, 9.3140, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.6566, 9.5668, 9.4778, 9.3897,\n 9.5057, 9.6210, 9.7356, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.2592, 10.3683, 10.4769, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.7367, 10.6537, 10.7594, 10.8644,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.3402, 11.4411, 11.5414, 11.6412, 11.5613, 11.4819, 11.5813,\n 11.5026, 11.6016, 11.7000, 11.7980, 11.8956, 11.8176, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.7808, 11.8771, 11.9730, 12.0685, 11.9928, 12.0878,\n 12.1825, 12.1073, 12.2016, 12.2954, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Vice President Dick Cheney on Tuesday hurled an obscenity on the Senate floor to punctuate an angry exchange with Vermont Sen. Patrick Leahy as all senators gathered for their annual photo.\nHypothesis: Cheney cursed at Sen. Patrick Leahy.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.6353, 1.5323, 1.4317, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.9215, 2.1412, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.8791, 1.7889, 1.7002, 1.9064, 2.1094, 2.0207,\n 1.9335, 2.1320, 2.3276, 2.2404, 2.1546, 2.0702, 1.9870, 1.9052,\n 2.0948, 2.0135, 1.9333, 1.8543, 2.0397, 2.2226, 2.1436, 2.0656,\n 2.2453, 2.4228, 2.3448, 2.2678, 2.1918, 2.1167, 2.0426, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.4077, 2.3349, 2.2629, 2.4286, 2.5927,\n 2.5207, 2.4495, 2.3791, 2.3094, 2.2405, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.3552, 2.5117, 2.4444, 2.3779, 2.3120, 2.4660, 2.4004,\n 2.3354, 2.2711, 2.2074, 2.1442, 2.0817, 2.0197, 1.9582, 1.8974,\n 1.8370, 1.7772, 1.9261, 1.8665, 1.8074, 1.9545, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.7522, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.8119, 1.7566, 1.7018, 1.6473, 1.7864, 1.9245,\n 1.8699, 1.8157, 1.9524, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732,\n 1.8204, 1.9540, 1.9013, 1.8490, 1.9813, 2.1128, 2.0604, 2.0083,\n 2.1386, 2.2680, 2.2159, 2.1640, 2.1125, 2.0613, 2.0105, 2.1381,\n 2.0873, 2.0369, 1.9868, 2.1131, 2.2387, 2.1884, 2.1385, 2.0889,\n 2.0396, 1.9906, 1.9419, 2.0656, 2.0170, 1.9686, 1.9206, 2.0430,\n 1.9950, 1.9473, 1.8999, 2.0212, 1.9738, 1.9267, 2.0470, 2.1667,\n 2.1195, 2.2384, 2.1913, 2.1444, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.8107, 5.0186, 4.8742, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 8.0042,\n 7.8905, 7.7784, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.4685, 9.5939, 9.4896, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 9.9187, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.2146, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.9462, 11.0554, 11.1640, 11.2719, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.4101, 11.3204, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.5632, 11.6667, 11.7696, 11.8719, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.1893, 12.2891, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.9952, 12.9116, 13.0067,\n 13.1014, 13.0185, 13.1129, 13.2068, 13.3002, 13.3933, 13.4859, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.8613, 13.9515, 14.0414, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.1482, 14.2367, 14.3248, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.6812, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: In the May 2005 general election Michael Howard failed to unseat the Labour Government, although the Conservatives did gain 33 seats, playing the most significant role in reducing Labour's majority from 167 to 66.\nHypothesis: In the May 2005 general election Conservatives got 33 seats.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.3206, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.6667, 1.8543, 1.7765, 1.9612, 1.8838, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.9149, 2.0785, 2.2405, 2.1723, 2.3324, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.8074, 1.7488, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.8091, 1.7522, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.8676, 1.8119, 1.7566, 1.8962, 1.8411, 1.9795, 1.9245,\n 1.8699, 2.0068, 1.9524, 1.8983, 2.0338, 1.9799, 1.9263, 2.0605,\n 2.1938, 2.1401, 2.2723, 2.2188, 2.1656, 2.1128, 2.0604, 2.1909,\n 2.1386, 2.0866, 2.2159, 2.1640, 2.1125, 2.2406, 2.3679, 2.4944,\n 2.4426, 2.5683, 2.5166, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.4099, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.4065, 2.5265, 2.4778, 2.4294, 2.5483, 2.6667,\n 2.7844, 2.7358, 2.8528, 2.8043, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 6.1143, 5.8889, 6.1101, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.4550, 6.6398, 6.8214, 7.0000,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.7250, 8.6000, 8.4770, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 9.0711, 9.2055, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.3901, 9.5191, 9.6470, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.0535, 9.9469, 9.8416, 9.9640,\n 10.0855, 9.9817, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.4745,\n 10.5909, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.3222, 11.2268,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.6971, 11.8018, 11.7108, 11.8151, 11.9187, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.0476, 12.1492, 12.2503, 12.3508, 12.4508, 12.5503,\n 12.4625, 12.3754, 12.2891, 12.3883, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.8881, 12.8037, 12.8997, 12.8160, 12.9116, 13.0067,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.5985, 13.6896, 13.6091, 13.6999, 13.6201,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.7442, 13.8333,\n 13.7559, 13.6789, 13.7679, 13.6914, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Norwegian Newspaper Corpus is a large and self-expanding corpus of Norwegian newspaper texts. The collection of this dynamic and continually growing corpus began in 1998.\nHypothesis: Dagbladet is a Norwegian newspaper.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.9869, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.9631, 1.1375, 1.0719, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.3856, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 1.2111, 1.3700, 1.3093,\n 1.4664, 1.6222, 1.5613, 1.5010, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.3373, 1.2808, 1.4289,\n 1.5758, 1.5191, 1.6646, 1.8091, 1.7522, 1.6958, 1.8385, 1.7823,\n 1.9237, 1.8676, 1.8119, 1.7566, 1.7018, 1.8411, 1.7864, 1.7321,\n 1.6781, 1.8157, 1.9524, 1.8983, 2.0338, 2.1685, 2.1143, 2.0605,\n 2.1938, 2.1401, 2.2723, 2.2188, 2.1656, 2.1128, 2.0604, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.9837, 2.1125, 2.0613, 2.1892, 2.3163,\n 2.2650, 2.2140, 2.3400, 2.2892, 2.4142, 2.3635, 2.3131, 2.2630,\n 2.2132, 2.1637, 2.1145, 2.0656, 2.0170, 2.1398, 2.2620, 2.2133,\n 2.3346, 2.4553, 2.4065, 2.3580, 2.4778, 2.4294, 2.5483, 2.5000,\n 2.4520, 2.4042, 2.3567, 2.3094, 2.2624, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.7924, 1.0445, 0.9467, 1.1918, 1.4317, 1.6667,\n 1.5671, 1.7963, 2.0211, 2.2418, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.2743, 2.1798, 2.0870, 1.9959, 2.1997, 2.1094, 2.0207,\n 2.2200, 2.4163, 2.6098, 2.8006, 2.9887, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.0000, 2.9140, 2.8292, 3.0071, 2.9231, 3.0984,\n 3.2717, 3.4429, 3.6122, 3.7796, 3.9452, 3.8600, 3.7758, 3.9389,\n 3.8555, 3.7732, 3.9340, 3.8523, 4.0112, 3.9302, 3.8503, 4.0069,\n 3.9276, 4.0825, 4.2359, 4.3879, 4.5384, 4.6876, 4.8355, 4.7556,\n 4.6765, 4.8226, 4.9675, 5.1111, 5.2535, 5.1745, 5.3156, 5.4554,\n 5.5942, 5.5155, 5.6530, 5.7894, 5.9247, 6.0590, 6.1923, 6.3246,\n 6.4558, 6.5861, 6.5072, 6.4291, 6.3517, 6.4807, 6.6089, 6.7361,\n 6.8624, 6.7854, 6.7090, 6.8343, 6.7585, 6.8828, 6.8076, 6.7330,\n 6.8564, 6.7823, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.5056,\n 7.6235, 7.7407, 7.8571, 7.7831, 7.7096, 7.8253, 7.7524, 7.6800,\n 7.7949, 7.9091, 8.0227, 7.9507, 8.0636, 7.9921, 8.1043, 8.0333,\n 7.9628, 7.8928, 8.0042, 8.1150, 8.0455, 8.1556, 8.0865, 8.1960,\n 8.3050, 8.2362, 8.1679, 8.2762, 8.3840, 8.3161, 8.4232, 8.3557,\n 8.2887, 8.2221, 8.3286, 8.2624, 8.3683, 8.3024, 8.4078, 8.5126,\n 8.4471, 8.3820, 8.3173, 8.4215, 8.3572, 8.4608, 8.3969, 8.5000,\n 8.6026, 8.5390, 8.4757, 8.5778, 8.6794, 8.6164, 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Dutch, who ruled Indonesia until 1949, called the city of Jakarta Batavia.\nHypothesis: Formerly ( until 1949 ) Batavia, Jakarta is largest city and capital of Indonesia.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.9456, 1.1547,\n 1.0742, 1.2792, 1.4812, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.6880, 0.8433,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.9512, 1.0973, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.0598, 1.0105, 0.9615, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.8805, 0.8340, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.9062, 1.0328, 1.1587, 1.1127, 1.2377, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.1667,\n 1.2883, 1.4093, 1.5298, 1.4846, 1.4397, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 1.8898, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.0370, 1.8974, 2.1776, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.1412, 2.3570, 2.5690, 2.4689,\n 2.6765, 2.5775, 2.4804, 2.3851, 2.2916, 2.4930, 2.6914, 2.8868,\n 3.0793, 2.9848, 3.1741, 3.0806, 2.9887, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.0000, 2.9140, 2.8292, 2.7456, 2.9231, 3.0984,\n 3.0151, 2.9329, 2.8518, 2.7717, 2.6928, 2.8638, 3.0330, 2.9542,\n 3.1211, 3.0429, 2.9656, 2.8893, 2.8138, 2.9775, 3.1394, 3.2998,\n 3.4586, 3.3826, 3.5396, 3.4641, 3.3895, 3.5443, 3.6977, 3.6233,\n 3.5496, 3.4768, 3.4047, 3.3333, 3.2627, 3.1928, 3.3428, 3.4915,\n 3.6389, 3.5689, 3.4995, 3.6452, 3.7897, 3.9331, 4.0753, 4.2164,\n 4.3564, 4.2862, 4.2167, 4.3552, 4.2861, 4.2176, 4.3547, 4.4907,\n 4.4225, 4.3548, 4.2877, 4.2212, 4.1552, 4.2893, 4.4224, 4.3566,\n 4.4887, 4.4233, 4.3583, 4.2940, 4.2301, 4.3605, 4.4901, 4.6188,\n 4.7467, 4.6826, 4.8095, 4.7458, 4.6825, 4.8083, 4.9333, 4.8702,\n 4.8076, 4.7455, 4.6838, 4.6225, 4.5617, 4.5013, 4.6245, 4.7469,\n 4.6867, 4.6268, 4.5674, 4.5083, 4.4497, 4.5707, 4.6911, 4.6325,\n 4.7520, 4.6938, 4.6359, 4.5783, 4.5212, 4.6395, 4.7572, 4.8742,\n 4.9906, 4.9333, 5.0489, 4.9918, 4.9351, 5.0499, 5.1642, 5.1075,\n 5.0513, 4.9953, 4.9397, 4.8845, 4.8295, 4.7749, 4.8877, 5.0000,\n 5.1117, 5.0571, 5.0027, 5.1137, 5.2241, 5.3340, 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The name for the newest James Bond film has been announced today. The 22nd film, previously known only as \"Bond 22\", will be called \"Quantum of Solace\". EON Productions who are producing the film made the announcement today at Pinewood Studios, where production for the film has been under way since last year. The name of the film was inspired by a short story (of the same name) from For Your Eyes Only by Bond creator, Ian Fleming.\nHypothesis: James Bond was created by Ian Fleming.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "101", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "34.7%", + "z-score": "2.24", + "p value": "0.0125", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 1.2421, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.6667, 1.8543, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.6908, 1.8677, 2.0426, 2.2156,\n 2.3868, 2.5560, 2.4814, 2.4077, 2.3349, 2.2629, 2.4286, 2.3570,\n 2.2862, 2.4495, 2.3791, 2.3094, 2.2405])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "167", + "Fraction of T in Greenlist": "83.9%", + "z-score": "19.2", + "p value": "2.04e-82", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.3485,\n 7.5378, 7.7232, 7.9048, 7.6980, 7.4983, 7.3054, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.8360, 7.6594, 7.8320, 7.6613, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.6676, 9.8072, 9.9454, 10.0820, 10.2172, 10.3510, 10.2093, 10.3423,\n 10.4739, 10.3358, 10.4667, 10.5963, 10.7246, 10.8518, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.4599, 11.3308, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.6743, 11.7907, 11.9062, 12.0208,\n 12.1346, 12.2474, 12.3595, 12.4708, 12.5812, 12.4622, 12.5723, 12.6815,\n 12.7900, 12.8978, 13.0048, 13.1111, 13.2167, 13.3217, 13.2067, 13.3113,\n 13.4152, 13.5185, 13.6211, 13.7230, 13.8244, 13.9251, 14.0253, 13.9140,\n 14.0139, 14.1131, 14.2118, 14.3099, 14.4075, 14.5045, 14.6010, 14.6969,\n 14.5890, 14.6847, 14.7799, 14.8746, 14.9687, 15.0624, 15.1556, 15.2483,\n 15.3405, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.6911, 15.7809,\n 15.8702, 15.9591, 15.8571, 15.9459, 16.0342, 16.1220, 16.2095, 16.2966,\n 16.3833, 16.4696, 16.5555, 16.4561, 16.5418, 16.6272, 16.7122, 16.7968,\n 16.8811, 16.9650, 17.0485, 17.1317, 17.0348, 17.1178, 17.2005, 17.2829,\n 17.3649, 17.4466, 17.5280, 17.6090, 17.6897, 17.5951, 17.6756, 17.7559,\n 17.8359, 17.9155, 17.9949, 18.0739, 18.1527, 18.2311, 18.1386, 18.2169,\n 18.2949, 18.3727, 18.4502, 18.5273, 18.6043, 18.6809, 18.7572, 18.6667,\n 18.7429, 18.8189, 18.8946, 18.9701, 19.0453, 19.1202, 19.1949])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The gastric bypass operation, also known as stomach stapling, has become the most common surgical procedure for treating obesity.\nHypothesis: Obesity is medically treated.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.0599, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.4021, 0.5717, 0.5120, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.9316, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.5069, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.7080, 2.5281, 2.3570,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.4295, 5.6099, 5.4870, 5.3666, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 7.9115, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.5448, 8.4449, 8.3463, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.3164, 8.2222, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.4868, 8.6102, 8.5210, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.6238, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.0134, 8.9285, 8.8443, 8.7610, 8.8778, 8.7952, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.4596, 10.5642, 10.4852, 10.4067,\n 10.3289, 10.2516, 10.1749, 10.2790, 10.3827, 10.4858, 10.4097, 10.5123,\n 10.4367, 10.5388, 10.4638, 10.5654, 10.4909, 10.5921, 10.5181, 10.4447,\n 10.5453, 10.6455, 10.5725, 10.6722, 10.7714, 10.6990, 10.6271, 10.5556,\n 10.6544, 10.5833, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.1667,\n 11.2624, 11.3577, 11.4525, 11.3820, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The cost of the consumer of the United States fell in June.\nHypothesis: U.S. consumer spending dived in June.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.9396, 1.1333, 1.0596, 1.2501, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.9058,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.7746, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.6983, 0.8447, 0.7921,\n 0.9372, 1.0812, 1.0284, 0.9759, 1.1183, 1.0659, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.1905, 1.1390, 1.0879, 1.0371, 1.1746, 1.1239,\n 1.2603, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.2285, 1.3608, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.2326, 1.1852, 1.1380, 1.2657, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.0328, 1.1587, 1.2839, 1.4084, 1.3620,\n 1.4857, 1.4393, 1.3933, 1.3474, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.4846, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 4.9377,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.7242, 5.8835, 6.0410, 5.9386,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.1996, 6.3502, 6.4993, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.7416, 6.6454, 6.5504, 6.4566, 6.5997,\n 6.7414, 6.8819, 6.7890, 6.9282, 7.0662, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.2001, 7.3333, 7.2443, 7.3765, 7.2884, 7.4194,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.8463, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 7.9649, 8.0882, 8.2107, 8.1266, 8.0434, 8.1650,\n 8.2858, 8.4057, 8.3231, 8.4423, 8.5607, 8.4788, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.8874, 9.0000, 8.9221, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.2118, 9.1357, 9.2450, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.6322, 9.7380, 9.8433, 9.7688, 9.6948, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.0371, 10.1398, 10.0668, 10.1690, 10.0965,\n 10.0245, 9.9531, 10.0547, 10.1558, 10.2565, 10.3566, 10.4563, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.8770, 10.8064, 10.7363, 10.6667,\n 10.7637, 10.6944, 10.6256, 10.7222, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The former leader of Iraq was rushed to hospital last Sunday after refusing to eat for sixteen days. But according to news agencies, he has ended the hunger strike by eating lunch at the court in Baghdad. \"Saddam ate beef and rice and cola with bread which he brought from hospital,\" one source told Reuters news agency. He was fasting with three co-defendants, and they were demanding more security for their defence lawyers, three of whom have been murdered.\nHypothesis: Some Australians have fasted to protest.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "25.8%", + "z-score": "0.229", + "p value": "0.409", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, 0.0000, -0.0516, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.1925,\n 0.1438, 0.0956, 0.2381, 0.3797, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.2289])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.3644, 4.1812, 4.0056, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.0814, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.0980, 3.9620, 4.1779, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.6898, 6.8458, 7.0000, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.0553, 6.9511, 6.8483, 6.7469, 6.8931,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.7555, 7.6603, 7.5661, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 8.0370, 7.9460, 8.0741,\n 7.9839, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.9496, 8.8631, 8.9815,\n 8.8958, 8.8108, 8.9285, 8.8443, 8.7610, 8.8778, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.0595, 9.1735, 9.2867, 9.3993, 9.5112, 9.4301,\n 9.3495, 9.2697, 9.3810, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.4752, 9.5840, 9.5066, 9.4299, 9.3537, 9.4619, 9.3863, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.3774, 9.3040, 9.4103, 9.5161, 9.6214,\n 9.5485, 9.6532, 9.5808, 9.5089, 9.6130, 9.7167, 9.6452, 9.5743,\n 9.6774, 9.7800, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 10.0448,\n 10.1450, 10.0753, 10.0061, 9.9374, 10.0371, 9.9687, 9.9008, 9.8333,\n 9.9325, 10.0312, 10.1295, 10.0624, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Sonia Gandhi can be defeated in the next elections in India by BJP.\nHypothesis: Sonia Gandhi is defeated by BJP.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.2%", + "z-score": "-2.22", + "p value": "0.987", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -1.8477, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -1.7450, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -1.9262, -1.7362, -1.7865, -1.8363, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.1685, -2.2083, -2.0605,\n -2.1004, -2.1401, -1.9941, -2.0339, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -1.9837, -1.8428, -1.8821, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.0799, -2.1167, -2.1532, -2.0212, -2.0578, -2.0943, -2.1306, -2.0000,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.2156])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.2106, 6.0982, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.5514, 6.4501, 6.3502, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.1591, 7.2960, 7.2029, 7.1110, 7.0201,\n 7.1556, 7.0657, 6.9768, 7.1111, 7.0231, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.0296, 6.9451, 7.0759, 6.9923, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.5484, 7.6734, 7.7976, 7.7152, 7.6335, 7.7567,\n 7.6758, 7.5955, 7.5161, 7.6383, 7.5595, 7.6808, 7.8014, 7.7232,\n 7.8429, 7.9619, 7.8842, 8.0024, 7.9253, 8.0427, 7.9663, 7.8905,\n 7.8153, 7.9318, 7.8571, 7.9729, 8.0880, 8.2024, 8.3162, 8.4293,\n 8.3550, 8.4674, 8.5792, 8.5054, 8.4322, 8.5433, 8.4706, 8.3984,\n 8.3268, 8.4371, 8.3660, 8.4757, 8.5848, 8.5141, 8.6226, 8.7305,\n 8.6603, 8.7676, 8.6978, 8.8045, 8.7351, 8.6662, 8.5978, 8.7039,\n 8.6359, 8.7414, 8.8464, 8.9509, 9.0549, 9.1584, 9.0906, 9.1936,\n 9.2960, 9.2287, 9.1617, 9.2637, 9.1971, 9.1310, 9.0653, 9.1667,\n 9.1013, 9.2022, 9.3026, 9.4026, 9.5021, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Israeli Prime Minister Ariel Sharon has said that Mahmoud Abbas is a man that Israel can do business with.\nHypothesis: Palestinian leader, Mahmoud Abbas, may be someone Israel can talk with.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, 0.0842, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.1390, -1.1825, -1.2257, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.2326, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.9614, 3.8431, 3.7273, 3.9284, 3.8146, 4.0119,\n 4.2060, 4.0937, 3.9837, 4.1740, 4.3614, 4.2528, 4.1461, 4.0415,\n 3.9386, 4.1219, 4.3026, 4.2008, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.6000, 4.7683, 4.9346, 4.8375, 4.7419, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.0395, 4.9472, 5.1051, 5.2614, 5.1698,\n 5.0795, 5.2338, 5.3865, 5.2970, 5.2086, 5.3594, 5.2719, 5.4212,\n 5.5690, 5.7155, 5.6285, 5.7735, 5.9172, 5.8310, 5.7457, 5.8878,\n 5.8034, 5.9442, 6.0838, 6.0000, 5.9171, 6.0553, 6.1924, 6.1101,\n 6.0287, 5.9481, 5.8684, 6.0038, 6.1382, 6.0590, 5.9806, 6.1137,\n 6.0359, 6.1680, 6.2990, 6.4291, 6.3517, 6.4807, 6.6089, 6.5320,\n 6.4558, 6.5828, 6.5072, 6.6332, 6.7585, 6.6833, 6.6088, 6.7330,\n 6.8564, 6.7823, 6.7089, 6.8313, 6.9529, 6.8799, 6.8075, 6.9282,\n 6.8563, 6.9762, 7.0952, 7.2136, 7.1421, 7.2596, 7.3765, 7.3054,\n 7.2348, 7.3508, 7.2807, 7.3960, 7.5106, 7.4409, 7.3717, 7.4855,\n 7.5988, 7.5299, 7.4616, 7.3937, 7.3263, 7.4386, 7.5504, 7.4833,\n 7.4167, 7.5277, 7.4615, 7.5719, 7.6816, 7.7908, 7.7249, 7.8335,\n 7.9415, 7.8759, 7.8107, 7.9181, 7.8533, 7.9601, 8.0663, 8.0018,\n 7.9377, 8.0433, 8.1485, 8.0847, 8.0212, 7.9582, 7.8956, 8.0000,\n 8.1039, 8.0416, 7.9796, 8.0829, 8.0212, 8.1240, 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Tempe Hometown Premiere will have all the trappings of a mega-Hollywood event including red carpet arrivals, klieg lights, screaming fans, paparazzi and media coverage of the film`s stars headed by Wolverine himself, Hugh Jackman and including Liev Schreiber, Ryan Reynolds, Taylor Kitsch, Lynn Collins, will.i.am and director Gavin Hood. Following a hard-fought contest stretching over thousands of cities and towns across the United States, Tempe emerged triumphant in a far-reaching, citywide bid to nab the gala event. Mayor Hugh Hallman himself rallied the community, which stepped up with dozens of videos that were posted on YouTube.\nHypothesis: Hugh Jackman plays the role of Wolverine in the movie.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.8926, 1.0773, 1.2599, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.9488, 0.8889, 0.8295, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.7089, 0.8575, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 0.9238, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.0439, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, -0.1280, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.1253, -0.1667,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.0779, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.6099, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.0171, 8.1550, 8.0495, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 10.8012, 10.9123, 11.0227,\n 11.1324, 11.0389, 10.9462, 11.0554, 10.9637, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.5005, 11.6059, 11.7108, 11.6206, 11.7249, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.0476, 12.1492, 12.2503, 12.3508, 12.2628, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.1849,\n 13.2791, 13.3728, 13.4661, 13.3829, 13.3002, 13.3933, 13.3113, 13.4040,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.8613, 13.9515, 13.8707, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.3166, 14.2367, 14.3248, 14.4126, 14.5000,\n 14.4208, 14.3422, 14.4294, 14.3513, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: In October, however, amid rising tensions between the government and opposition groups, a car bomb seriously injured an opposition politician and killed his driver, in Beirut.\nHypothesis: A member of the opposition was injured in a car bomb attack in Beirut.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.0949, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, 0.0000,\n -0.0716, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, 0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.6868, -0.5477,\n -0.5915, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.4233, -0.4644, -0.5053, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.4949, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.7%", + "z-score": "14.2", + "p value": "4.81e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.7306, 9.6156, 9.5021, 9.3901, 9.5191, 9.6470, 9.7738, 9.8995,\n 9.7897, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.2061, 10.3257, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.7066, 10.8215, 10.9355, 11.0488, 10.9488, 10.8498, 10.9626,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.4574, 11.5645, 11.6709, 11.7766, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.9060, 12.0096, 11.9187, 11.8287, 11.9319,\n 11.8427, 11.9455, 11.8571, 11.9594, 12.0611, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.5976,\n 12.6949, 12.6103, 12.7073, 12.6234, 12.7199, 12.8160, 12.9116, 13.0067,\n 13.1014, 13.0185, 13.1129, 13.2068, 13.3002, 13.2182, 13.3113, 13.4040,\n 13.3227, 13.4150, 13.3343, 13.2542, 13.3463, 13.2668, 13.3585, 13.2796,\n 13.3710, 13.2927, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.7559, 13.8447, 13.9332, 14.0214, 14.1091, 14.1966])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Weinstock painstakingly reviewed dozens of studies for evidence of any link between sunscreen use and either an increase or decrease in melanoma.\nHypothesis: skin cancer numbers increase.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.4327, -0.4747, -0.5164, -0.5579, -0.4280, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "46.2%", + "z-score": "6.92", + "p value": "2.31e-12", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 5.3333, 5.6045, 5.2463, 4.9193, 5.1962, 5.4611, 5.1711,\n 5.4306, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855, 5.4271, 5.2085, 5.4444,\n 5.6737, 5.4678, 5.2705, 5.0811, 5.3072, 5.1257, 5.3468, 5.1723, 5.3886,\n 5.2204, 5.0576, 4.8999, 4.7469, 4.5985, 4.8107, 4.6664, 4.5260, 4.7336,\n 4.5968, 4.4634, 4.3333, 4.2064, 4.0825, 3.9614, 3.8431, 3.7273, 3.6141,\n 3.8146, 3.7033, 3.5942, 3.4873, 3.3824, 3.2796, 3.1787, 3.3729, 3.5642,\n 3.4641, 3.6522, 3.8376, 4.0205, 3.9208, 4.1008, 4.0024, 3.9056, 3.8103,\n 3.9869, 3.8927, 3.8000, 3.9736, 3.8819, 3.7916, 3.9624, 3.8730, 3.7849,\n 3.9530, 4.1192, 4.0316, 4.1957, 4.3580, 4.5186, 4.6775, 4.8347, 4.9904,\n 5.1444, 5.2970, 5.4480, 5.3594, 5.2719, 5.4212, 5.5690, 5.4822, 5.6285,\n 5.5426, 5.4576, 5.3736, 5.5181, 5.6614, 5.8034, 5.7199, 5.8605, 6.0000,\n 5.9171, 5.8351, 5.7540, 5.8919, 5.8114, 5.9481, 5.8684, 6.0038, 5.9247,\n 5.8464, 5.7689, 5.6921, 5.6160, 5.7498, 5.6743, 5.5995, 5.7320, 5.6578,\n 5.5842, 5.5114, 5.4391, 5.3675, 5.2965, 5.2262, 5.1564, 5.0873, 5.2175,\n 5.1488, 5.0806, 5.0130, 4.9460, 4.8795, 4.8135, 4.9419, 5.0694, 5.0037,\n 5.1303, 5.2560, 5.3810, 5.3153, 5.4393, 5.3740, 5.3092, 5.2449, 5.3677,\n 5.3038, 5.4257, 5.5470, 5.6675, 5.7874, 5.7234, 5.8424, 5.9607, 6.0784,\n 6.1954, 6.3117, 6.4274, 6.5424, 6.6568, 6.7706, 6.7061, 6.6421, 6.7551,\n 6.8675, 6.8037, 6.9155, 6.8520, 6.7890, 6.7264, 6.8373, 6.9477, 7.0574,\n 6.9950, 7.1041, 7.2127, 7.1506, 7.0888, 7.0273, 7.1352, 7.0741, 7.1813,\n 7.1205, 7.2272, 7.1667, 7.1065, 7.0467, 6.9873, 6.9282, 7.0340, 6.9752,\n 6.9167])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Most people are familiar with the idea of St. Bernards or other dogs taking part in rescue and recovery efforts. Robots might also take part in search and rescue missions.\nHypothesis: Robots are used to find missing victims.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "17.5%", + "z-score": "-2.19", + "p value": "0.986", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.1796, -2.2188, -2.2578, -2.2966, -2.1520, -2.1909])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 4.6188, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.4444, 5.6307, 5.8140, 5.9944, 5.8635,\n 6.0413, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.5333, 7.6823, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 8.8007, 8.9324, 8.8260, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.4327, 9.5556, 9.4563, 9.5784, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.7678, 9.8858, 9.7912, 9.9085,\n 9.8150, 9.9315, 10.0472, 10.1621, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.3373, 11.2493, 11.1621,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.4935, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.6311, 11.7326, 11.6487, 11.5655, 11.6666, 11.7672, 11.8673,\n 11.9669, 12.0660, 11.9837, 12.0824, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.4065, 12.5024, 12.5979, 12.6930, 12.6130, 12.7077,\n 12.8019, 12.7226, 12.6439, 12.5657, 12.4880, 12.4109, 12.5049, 12.4283,\n 12.3523, 12.2767, 12.3705, 12.2954, 12.2209, 12.1468, 12.0731, 12.1667,\n 12.0935, 12.1867, 12.1141, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Illinois born Charlton Heston was a 27-year-old actor from Broadway and television when he arrived in Hollywood for a five-picture contract with Hal Wallis.\nHypothesis: Charlton Heston was born in Illinois.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.7566, -0.6030, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.1448, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.0467, 0.0930, 0.2319, 0.1849, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.3563,\n 0.4885, 0.4428, 0.5740, 0.7044, 0.6584, 0.7878, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.7703, 0.7255, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.9186, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.4686, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.3033, 7.1909, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.7555, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.9839, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.6186, 9.7312, 9.8430, 9.9542, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.2253, 10.1429, 10.0611, 10.1692, 10.2766, 10.1955, 10.3024,\n 10.4087, 10.3284, 10.4341, 10.3544, 10.4596, 10.3805, 10.4852, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.7671, 10.6904,\n 10.7920, 10.7159, 10.8170, 10.7415, 10.6665, 10.7671, 10.8673, 10.7928,\n 10.8925, 10.8186, 10.7451, 10.6722, 10.7714, 10.6990, 10.7978, 10.7258,\n 10.8241, 10.7527, 10.8505, 10.9480, 11.0450, 11.1415, 11.2376, 11.1667,\n 11.2624, 11.1919, 11.1218, 11.0521, 10.9829, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Ruth's 1927 single season record of 60 home runs stood unsurpassed until Roger Maris hit 61 in 1961.\nHypothesis: Babe Ruth hit 60 home runs in his lifetime.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.3244, -0.3769, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.6885, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 5.9186, 6.1107, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.6000, 7.4878,\n 7.3773, 7.2684, 7.1611, 7.3073, 7.4521, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 7.9138, 7.8150, 7.9495, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.5437, 8.4526, 8.3625, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.3453, 8.2588, 8.3813, 8.5030, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.7610, 8.8778, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.2554, 9.3686, 9.2867, 9.2055, 9.3181, 9.2376,\n 9.1577, 9.0786, 9.1905, 9.3017, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.5381, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.7072, 9.6322, 9.7380, 9.8433, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.7886, 9.8918, 9.9944, 9.9224,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.2565, 10.1855, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.3439, 10.2743, 10.3730, 10.4713, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.7222, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The German technology was employed to build Shanghai's existing maglev line, the first in the world to be used commercially.\nHypothesis: Maglev is commercially used.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 1.0596, 0.9869, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.4403, 1.3697, 1.5475, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.2831, 1.2185, 1.3856, 1.3213, 1.2577, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.3954, 1.3333, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.2182, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, 0.0418, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 5.9132, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.5333, 7.6823, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.0179, 8.9086, 9.0401, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.3257, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.4065, 11.3091, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.4440, 11.5515, 11.6584, 11.7647, 11.6709, 11.7766, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.3985, 12.3063, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.4286, 12.5289, 12.6287, 12.7279, 12.6387, 12.7376,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.2243, 13.3201, 13.2324, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.3447, 13.4390, 13.5329, 13.6264, 13.5412,\n 13.6343, 13.7270, 13.8193, 13.9111, 14.0025, 14.0936, 14.1842, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.9139, 15.0000,\n 14.9195, 14.8396, 14.9255, 15.0111, 15.0964, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: SEATTLE \u2014 United Nations gang leader Clay Roueche should spend at least 30 years behind bars after his surprise guilty plea here Tuesday for conspiracy to smuggle cocaine, marijuana and illicit drug profits, U.S. Attorney Jeffrey Sullivan said. Sullivan said outside court that Roueche's leadership of the violent UN gang and the sophistication of the international drug conspiracy will all be factors used to argue for a longer sentence \u2014 up to life \u2014 for the 33-year-old Canadian. And he stressed that the plea agreement reached with Roueche did not include \"any kind of break with respect to sentencing.\".\nHypothesis: Clay Roueche is 33 years old.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "27.5%", + "z-score": "0.76", + "p value": "0.224", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, 0.0983, 0.2449, 0.1952, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.5695, 0.5203, 0.4714, 0.6108, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.6912, 0.6430, 0.5952, 0.7303,\n 0.6825, 0.8165, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "167", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "70.7%", + "z-score": "13.6", + "p value": "1.4e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 8.1882, 8.3557, 8.1763, 8.0018, 8.1689, 8.3333,\n 8.1654, 8.0017, 8.1654, 8.0064, 8.1684, 8.3281, 8.1742, 8.0238,\n 8.1825, 8.3391, 8.4936, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.5219, 9.3831, 9.5229, 9.6612, 9.5258,\n 9.3927, 9.5304, 9.4000, 9.5366, 9.6719, 9.5443, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 10.2075, 10.3347, 10.4608, 10.5859,\n 10.7098, 10.8327, 10.7125, 10.5940, 10.7164, 10.8379, 10.7215, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.7429, 10.6329, 10.7518, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 11.7881, 11.6829, 11.7932, 11.9029, 11.7992, 11.6966, 11.8058,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.7755, 12.6800, 12.7812, 12.8819, 12.7875, 12.6939, 12.7943, 12.7017,\n 12.8017, 12.9011, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.6050, 13.6990, 13.7926, 13.7054, 13.6188, 13.7122, 13.6264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Baker's voice will replace the earlier computer-synthesized voice that was previously used. BT said that his voice was chosen as it was instantly recognisable. It took him 11 days to record 11,593 phrases and sounds which could then be broken down and reassembled by a computer to make new words. It then took five months to process these recordings to make a workable service. BT have said that there will be no barriers as to what Tom Baker's voice can 'say', including rude words. \"What appeals to me most is the thought that I will be bringing good news to people whether it is a cheeky message, a birthday greeting, or just a quick hello,\" said Baker.\nHypothesis: Tom Baker works for BT.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "133", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "19.5%", + "z-score": "-1.45", + "p value": "0.927", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.0498, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.5626, 5.7735, 5.9797, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.1101, 9.0067, 8.9045, 9.0323, 9.1590, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.3140, 9.4346, 9.5543, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.2106, 10.1243, 10.2348, 10.1494, 10.2592, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.1556, 11.0728, 10.9906, 10.9091, 11.0125, 10.9317, 10.8515, 10.9545,\n 11.0569, 10.9773, 11.0793, 11.1807, 11.2816, 11.2028, 11.3032, 11.4031,\n 11.3249, 11.2473, 11.3468, 11.2698, 11.3688, 11.2924, 11.2164, 11.1410,\n 11.2396, 11.1648, 11.0904, 11.0165, 11.1148, 11.2126, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.3809, 11.4766, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.6179, 11.5470, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Norwegian Nobel Committee is responsible for the selection of the candidates and the choice of Prize Winners for the Peace Prize. The Committee is composed of five members appointed by the Storting (Norwegian parliament). The Peace Prize is awarded in Oslo, Norway and not in Stockholm, Sweden like the other Nobel Prizes.\nHypothesis: Nobel Peace Prize candidates have been chosen.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "204", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "31.9%", + "z-score": "2.26", + "p value": "0.0118", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.7857, 0.7006, 0.9258,\n 0.8412, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.9456, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.4003, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.2501, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.3483, 1.2831, 1.4506, 1.3856, 1.3213, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.5556, 1.7143, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.8385, 1.7767, 1.7154, 1.6547, 1.5945, 1.7465, 1.8974,\n 1.8370, 1.7772, 1.7179, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.6710, 1.6160, 1.7566, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.8446, 1.9799, 2.1143, 2.0605,\n 2.0071, 1.9540, 2.0868, 2.0339, 1.9813, 1.9291, 1.8773, 2.0083,\n 2.1386, 2.0866, 2.0350, 1.9837, 2.1125, 2.0613, 2.0105, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.7894, 1.7408,\n 1.8660, 1.8175, 1.9419, 1.8935, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.9473, 1.8999, 2.0212, 1.9738, 2.0943, 2.2141, 2.3333,\n 2.2857, 2.2384, 2.1913, 2.3094, 2.2624, 2.2156, 2.1691, 2.1229,\n 2.2398, 2.3561, 2.3098, 2.2637])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.4610, 4.3083, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 6.7269, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.3147, 7.1945, 7.0763,\n 6.9601, 6.8458, 7.0000, 6.8876, 7.0401, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.7447, 9.8634, 9.9813, 10.0984, 10.0029, 9.9085,\n 9.8150, 9.7224, 9.8389, 9.9547, 10.0698, 9.9783, 9.8877, 10.0021,\n 9.9124, 10.0261, 9.9373, 9.8494, 9.9625, 9.8753, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.8702, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.8431, 9.9524, 10.0611, 10.1692, 10.0881, 10.0076, 10.1151,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.2753, 10.3805, 10.4852, 10.4067,\n 10.5109, 10.6145, 10.7175, 10.8200, 10.9220, 10.8443, 10.9458, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.7604, 11.8571, 11.9534, 12.0493, 11.9730, 11.8973, 11.8221, 11.7473,\n 11.8429, 11.9380, 12.0327, 11.9586, 11.8849, 11.9792, 11.9060, 12.0000,\n 11.9273, 11.8551, 11.9487, 11.8769, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: More than 150 dolphins, marine turtles and beaked whales have been washed up dead on beaches in Africa.\nHypothesis: Dead dolphins, turtles and whales have been found on African beaches.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.1816, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.5143, 0.6660, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.3746,\n 0.3267, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, 0.0000, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, -0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.0420, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "74", + "Fraction of T in Greenlist": "37.2%", + "z-score": "3.97", + "p value": "3.59e-05", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.1342, 0.0667, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.2357,\n -0.0586, 0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.0525, -0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.4611, 0.6124,\n 0.7625, 0.7089, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.9901,\n 1.1345, 1.0812, 1.2243, 1.3663, 1.5073, 1.6473, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.8446, 1.9799, 2.1143, 2.2478,\n 2.1938, 2.1401, 2.2723, 2.4037, 2.3500, 2.2966, 2.2436, 2.1909,\n 2.1386, 2.2680, 2.2159, 2.1640, 2.1125, 2.2406, 2.3679, 2.4944,\n 2.6203, 2.5683, 2.5166, 2.6414, 2.7654, 2.8887, 2.8368, 2.9593,\n 3.0811, 3.0292, 2.9776, 3.0984, 3.0469, 3.1669, 3.2863, 3.4050,\n 3.3534, 3.3020, 3.4198, 3.5370, 3.6537, 3.7697, 3.7180, 3.8333,\n 3.7818, 3.8964, 4.0105, 3.9590, 4.0723, 4.0210, 3.9699])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Endon was respected for her humble, unassuming personality. She was popularly referred to as \"Kak Endon,\" or \"Elder Sister Endon.\" She discovered she had cancer in 2002 after her twin sister, Noraini, was earlier diagnosed with the disease. Noraini died in 2003. Endon had a breast removed and traveled to the United States regularly for treatment. She returned to Malaysia from Los Angeles on Oct. 1 after five rounds of chemotherapy since July. Endon had been deeply involved in awareness programs for breast cancer, the main cause of illness-related fatalities for Malaysian women. She has said she refused to feel sorry for herself, and felt fortunate she could obtain the best medical treatment.\nHypothesis: Noraini had a twin sister called Endon.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.6353, 1.8728, 1.7685, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.9215, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.4907, 1.4045, 1.6131, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.4446, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.6547, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.0856, 0.2134, 0.1703,\n 0.2971, 0.4233, 0.3800, 0.3369, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.4145, 0.5375, 0.6598, 0.6170, 0.5744, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "146", + "Fraction of T in Greenlist": "73.4%", + "z-score": "15.8", + "p value": "3.07e-56", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 5.9186, 6.1107, 5.9588, 6.1477, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.2055, 9.0896, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 11.1197, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.5866, 11.6966, 11.5950,\n 11.7045, 11.8132, 11.9213, 11.8212, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.3655, 12.4689, 12.5717, 12.6739,\n 12.5782, 12.4834, 12.5853, 12.6867, 12.7875, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.0922, 13.1905, 13.2882, 13.3854, 13.2936, 13.2025, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.7772, 13.8713, 13.7818, 13.8756,\n 13.9690, 14.0619, 13.9735, 13.8857, 13.9784, 14.0707, 14.1625, 14.2539,\n 14.3449, 14.4355, 14.5257, 14.4394, 14.5293, 14.6188, 14.7079, 14.6225,\n 14.5378, 14.6267, 14.7152, 14.8034, 14.8912, 14.9786, 15.0657, 15.1524,\n 15.0689, 15.1553, 15.2414, 15.3272, 15.2446, 15.1625, 15.2481, 15.3333,\n 15.4182, 15.5028, 15.5870, 15.6709, 15.7545, 15.6736, 15.7570])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The plaintiffs in this most recent suit contend that Lee, Perlmutter, Arthur Lieberman and Avi Arad conspired in bad faith to conceal and misappropriate financial interests in Lee's creations assigned to Stan Lee Media in 1998. SLM's meltdown involved its former President Peter F. Paul fleeing to Brazil, contributions made to Bill and Hillary Clinton, Paul's extradition and more. In 2007, SLM filed a $5 billion lawsuit in which it claimed co-ownership of all of Stan Lee's creations for Marvel.\nHypothesis: Hillary Clinton is Bill's sister.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.5010, -1.5479, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.2443, -1.2910, -1.1316, -1.1783, -1.0206,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -1.1825, -1.2257, -1.2686, -1.1239,\n -1.1669, -1.0235, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.0820, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.3926,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.4393, -1.4777, -1.5159, -1.3859, -1.4241, -1.4621, -1.3333,\n -1.3714, -1.2435, -1.2817, -1.3197, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "95", + "Fraction of T in Greenlist": "47.7%", + "z-score": "7.41", + "p value": "6.42e-14", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 4.8742, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 4.8038, 4.6790, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.6571, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 4.8305, 4.7278, 4.6268,\n 4.7977, 4.6981, 4.8667, 5.0332, 5.1978, 5.0990, 5.2615, 5.4222,\n 5.3245, 5.2281, 5.3867, 5.5435, 5.4482, 5.3541, 5.2614, 5.1698,\n 5.3243, 5.2338, 5.1444, 5.0562, 5.2086, 5.1212, 5.0350, 5.1855,\n 5.3345, 5.2489, 5.3964, 5.5426, 5.4576, 5.3736, 5.2906, 5.2085,\n 5.3526, 5.4956, 5.4140, 5.5556, 5.6959, 5.6149, 5.5348, 5.4554,\n 5.3769, 5.5155, 5.4377, 5.3606, 5.2842, 5.4212, 5.5572, 5.4813,\n 5.4061, 5.5407, 5.4661, 5.5995, 5.5255, 5.4521, 5.5842, 5.7155,\n 5.8458, 5.7726, 5.7001, 5.6282, 5.7572, 5.8853, 6.0125, 5.9409,\n 5.8698, 5.9960, 6.1213, 6.2458, 6.3694, 6.2985, 6.4213, 6.3509,\n 6.4728, 6.4028, 6.3333, 6.2644, 6.1961, 6.1283, 6.2489, 6.1815,\n 6.1146, 6.0481, 6.1677, 6.2866, 6.2205, 6.1548, 6.2728, 6.2075,\n 6.1427, 6.0784, 6.1954, 6.3117, 6.4274, 6.3632, 6.2994, 6.4143,\n 6.3509, 6.2879, 6.4019, 6.3392, 6.4526, 6.3902, 6.3283, 6.4409,\n 6.3793, 6.3180, 6.4298, 6.5410, 6.6517, 6.5906, 6.7006, 6.6398,\n 6.5794, 6.6887, 6.7974, 6.9056, 7.0133, 6.9530, 7.0601, 7.1667,\n 7.2728, 7.3783, 7.3180, 7.2581, 7.3631, 7.3034, 7.4078])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Alex Da Silva, a salsa dance instructor and choreographer for Fox's \"So You Think You Can Dance,\" was arrested Saturday on suspicion of sexually assaulting four of his students, according to a statement from the Los Angeles Police Department. Da Silva, 41, is accused of assaulting the students in his two homes in the San Fernando Valley over the last six years, police said. He is being held in lieu of $3.8 million bail. Authorities said the four victims were all students of Da Silva at the time of the assaults.\nHypothesis: Alex Da Silva is a student of salsa dance.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "88", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.246", + "p value": "0.403", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.0928, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.0179, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.7778, 9.8987, 9.7986, 9.9187, 9.8198,\n 9.9392, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.4448, 10.3496, 10.4636, 10.5769, 10.4829, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 11.1803, 11.0897,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.2674, 11.3721, 11.4762, 11.5797, 11.6827, 11.7851, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.7326, 11.8336, 11.7498, 11.8503, 11.9504, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.3100, 12.4065, 12.3263, 12.4223, 12.5179, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.9099, 12.8313, 12.7532, 12.8464, 12.9391,\n 12.8616, 12.9540, 12.8771, 12.9691, 13.0608, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.3473, 13.4371, 13.3615, 13.2864, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Ruth bounced back from his 1925 ailments and problems to lead the American League in home runs with 47, 28 home runs more than anyone else.\nHypothesis: Ruth hit 47 runs in his lifetime.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.32", + "p value": "0.0935", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.9949, 0.9169, 0.8402, 1.0426, 1.2421, 1.1648, 1.3608,\n 1.2839, 1.4765, 1.4000, 1.3245, 1.5133, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.4446, 1.6164, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.5164, 1.6828, 1.6166, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.8850, 0.8296, 0.9812, 0.9258, 0.8709, 1.0206,\n 1.1692, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 1.1711, 1.1183, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.1905, 1.3288, 1.4662, 1.4142, 1.5505, 1.6859,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.7454, 1.6941, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.6230, 1.5731, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.4087, 1.3607, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.6087, 1.7310, 1.6843, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.3644, 1.3197])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 5.1711, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.4370, 7.3147, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.5556, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.5294, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.3630, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 11.0756, 10.9898, 10.9048, 11.0102, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.4829, 11.4009, 11.3196,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.6412, 11.7405, 11.8393, 11.9377,\n 12.0355, 11.9558, 12.0532, 11.9741, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.1076, 12.0302, 11.9534, 12.0493, 12.1447, 12.2397, 12.1635, 12.2581,\n 12.3523, 12.2767, 12.2016, 12.2954, 12.3888, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.9574, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: On Tuesday, October 7, 2003, Arnold Schwarzenegger was elected Governor of California.\nHypothesis: Arnold Schwarzenegger was elected Governor of California.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.4815, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.2872, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.3333, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.5410, -0.5843, -0.6274, -0.6702, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.5627, 2.4351, 2.3113, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.3618, 4.5569, 4.7488, 4.9377,\n 4.8177, 4.7002, 4.8857, 5.0684, 4.9528, 4.8394, 5.0190, 4.9075,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.6568, 4.8305, 4.7278, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.4222,\n 5.5811, 5.4832, 5.6401, 5.7955, 5.9491, 5.8522, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.1664, 6.0740, 5.9827, 6.1283,\n 6.0380, 6.1820, 6.0927, 6.2354, 6.3768, 6.2883, 6.4283, 6.3408,\n 6.2541, 6.3928, 6.3070, 6.2222, 6.3595, 6.2755, 6.4116, 6.3283,\n 6.4632, 6.5970, 6.7298, 6.8615, 6.7788, 6.6968, 6.6157, 6.7462,\n 6.8757, 7.0043, 6.9237, 6.8439, 6.7648, 6.8922, 6.8138, 6.7361,\n 6.8624, 6.7854, 6.7090, 6.8343, 6.7585, 6.6833, 6.6088, 6.7330,\n 6.6591, 6.7823, 6.7089, 6.8313, 6.7584, 6.6861, 6.8075, 6.9282,\n 6.8563, 6.9762, 7.0952, 7.2136, 7.3312, 7.4482, 7.5644, 7.6800,\n 7.7949, 7.9091, 7.8372, 7.9507, 7.8793, 7.8084, 7.9211, 7.8507,\n 7.9628, 7.8928, 8.0042, 8.1150, 8.0455, 8.1556, 8.0865, 8.0178,\n 8.1273, 8.0591, 8.1679, 8.1001, 8.2084, 8.3161, 8.2486, 8.3557,\n 8.4623, 8.3952, 8.5012, 8.4345, 8.5399, 8.6448, 8.7492, 8.8531,\n 8.9565, 8.8900, 8.9929, 8.9268, 9.0292, 9.1310, 9.0653, 9.0000,\n 9.1013, 9.2022, 9.3026, 9.4026, 9.3375, 9.2729, 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Patricia Amy Messier and Eugene W. Weaver were married May 28 at St. Clare Roman Catholic Church in North Palm Beach.\nHypothesis: Eugene W. Weaver is the husband of Patricia Amy.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 1.1471, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.5492,\n 1.7321, 1.6577, 1.5843, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.0370, 1.1877, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.2611, 1.4071, 1.3517, 1.2968, 1.4410, 1.3862,\n 1.3318, 1.2778, 1.2243, 1.1711, 1.1183, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 1.0879, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.9017, 0.8540, 0.8066, 0.7595, 0.8909,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.9629, 0.9165, 0.8704,\n 0.9981, 1.1251, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 1.0890, 1.0444, 1.0000,\n 0.9558, 1.0777, 1.1990, 1.1547, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 2.3190, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 3.9158, 3.7897, 3.6667,\n 3.5466, 3.4293, 3.3147, 3.5228, 3.4101, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 3.0817, 2.9814, 2.8830, 3.0796, 2.9823, 2.8868,\n 2.7928, 2.9848, 3.1741, 3.0806, 2.9887, 3.1743, 3.3574, 3.5382,\n 3.7166, 3.8927, 4.0667, 4.2385, 4.4083, 4.3146, 4.2222, 4.1312,\n 4.0415, 3.9530, 3.8657, 4.0316, 3.9452, 3.8600, 3.7758, 3.6927,\n 3.6107, 3.5298, 3.6919, 3.6116, 3.5322, 3.6920, 3.6133, 3.5355,\n 3.4586, 3.6159, 3.7717, 3.6950, 3.6193, 3.7730, 3.9253, 4.0762,\n 4.2258, 4.3740, 4.5210, 4.6667, 4.8111, 4.7341, 4.6580, 4.5826,\n 4.5079, 4.4341, 4.3609, 4.5029, 4.4302, 4.3583, 4.2870, 4.2164,\n 4.1464, 4.0771, 4.2167, 4.1478, 4.0795, 4.2176, 4.1498, 4.0825,\n 4.0158, 4.1522, 4.2877, 4.2212, 4.1552, 4.2893, 4.4224, 4.5547,\n 4.6860, 4.8164, 4.9460, 5.0747, 5.2025, 5.1357, 5.0694, 5.0037,\n 4.9385, 4.8737, 4.8095, 4.9356, 4.8717, 4.8083, 4.7454, 4.6829,\n 4.6209, 4.5594, 4.6838, 4.6225, 4.5617, 4.6850, 4.6245, 4.5644,\n 4.5047, 4.6268, 4.7483, 4.6887, 4.6295, 4.7500, 4.8698, 4.9889,\n 5.1073, 5.2251, 5.3423, 5.4588, 5.5747, 5.5149, 5.4554, 5.3964,\n 5.3377, 5.2795, 5.2215, 5.3361, 5.2784, 5.2211, 5.1642, 5.1075,\n 5.0513, 4.9953, 5.1086, 5.0529, 4.9975, 5.1100, 5.0548, 5.0000,\n 4.9455, 5.0571, 5.1681, 5.1137, 5.0595, 5.1698, 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: On April 28 1945, at the Piazzale Loreto in Milan, Benito Mussolini and his mistress Clara Petacci were shot and hanged in a spectacle that was photographed repeatedly.\nHypothesis: Mussolini's mistress was Clara Petacci.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 1.1446, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.4140, 0.3426, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.6586, 0.8337, 1.0070, 1.1785,\n 1.3483, 1.5164, 1.4506, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.7772, 1.9261, 2.0739, 2.0140, 2.1602, 2.1005, 2.0412,\n 1.9825, 1.9242, 1.8664, 1.8091, 1.7522, 1.8953, 1.8385, 1.9803,\n 2.1210, 2.2608, 2.3995, 2.5373, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.2535, 2.1980, 2.1429, 2.0881, 2.0338, 2.1685, 2.1143, 2.0605,\n 2.1938, 2.1401, 2.2723, 2.2188, 2.3500, 2.2966, 2.4267, 2.5560,\n 2.5026, 2.4495, 2.3967, 2.3443, 2.4721, 2.5990, 2.7253, 2.6726,\n 2.6203, 2.5683, 2.6932, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.3368, 2.4597, 2.4099, 2.5319, 2.6534, 2.7741, 2.7240,\n 2.6742, 2.6247, 2.5754, 2.5265, 2.4778, 2.4294, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.3567, 2.3094, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "93", + "Fraction of T in Greenlist": "47.7%", + "z-score": "7.32", + "p value": "1.26e-13", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 3.4641,\n 3.2206, 2.9938, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.5744, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.3853, 3.2577, 3.1334, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.8431, 3.7273, 3.6141, 3.8146, 4.0119,\n 4.2060, 4.0937, 3.9837, 4.1740, 4.3614, 4.5461, 4.4371, 4.6188,\n 4.5115, 4.6904, 4.5847, 4.4809, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.5274, 4.4296, 4.3333, 4.5034, 4.4083, 4.5760, 4.4820, 4.6476,\n 4.5547, 4.4630, 4.6262, 4.7875, 4.6967, 4.6070, 4.7662, 4.9237,\n 5.0795, 4.9904, 5.1444, 5.2970, 5.4480, 5.3594, 5.2719, 5.1855,\n 5.3345, 5.2489, 5.3964, 5.5426, 5.6874, 5.6023, 5.7457, 5.8878,\n 6.0288, 6.1685, 6.3070, 6.4444, 6.3595, 6.2755, 6.1924, 6.1101,\n 6.0287, 5.9481, 6.0837, 6.0038, 5.9247, 5.8464, 5.7689, 5.6921,\n 5.6160, 5.7498, 5.6743, 5.5995, 5.7320, 5.6578, 5.5842, 5.7155,\n 5.8458, 5.9752, 6.1036, 6.2312, 6.1577, 6.0848, 6.2113, 6.1389,\n 6.2644, 6.1926, 6.1213, 6.0506, 5.9805, 6.1047, 6.2282, 6.3509,\n 6.4728, 6.4028, 6.5238, 6.4543, 6.3853, 6.5054, 6.6248, 6.7434,\n 6.6747, 6.6064, 6.7242, 6.8413, 6.9577, 6.8897, 6.8222, 6.7552,\n 6.8707, 6.8041, 6.7380, 6.8527, 6.9667, 6.9009, 6.8355, 6.7706,\n 6.7061, 6.6421, 6.7551, 6.6914, 6.6282, 6.5653, 6.6775, 6.6150,\n 6.5528, 6.6642, 6.7751, 6.7132, 6.6517, 6.7618, 6.8713, 6.9803,\n 6.9190, 7.0273, 7.1352, 7.2425, 7.1813, 7.1205, 7.0601, 7.1667,\n 7.1065, 7.2125, 7.3180])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Twelve of Jupiter's moons are relatively small and seem to have been more likely captured than to have been formed in orbit around Jupiter.\nHypothesis: Jupiter has Twelve moons.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "107", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "28.0%", + "z-score": "0.726", + "p value": "0.234", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, 0.1217, 0.0605, 0.0000, 0.1796, 0.3573, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.7256])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.6644, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.6353, 1.5323, 1.7685, 1.6667,\n 1.8970, 1.7963, 1.6977, 1.9215, 2.1412, 2.0428, 2.2576, 2.4689,\n 2.3706, 2.5775, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823, 2.8868,\n 3.0793, 3.2691, 3.1741, 3.3607, 3.2667, 3.4503, 3.3574, 3.5382,\n 3.4463, 3.6242, 3.8000, 3.7087, 3.8819, 4.0531, 3.9624, 3.8730,\n 3.7849, 3.9530, 4.1192, 4.2836, 4.1957, 4.1090, 4.2710, 4.4313,\n 4.5899, 4.5035, 4.6603, 4.5747, 4.7296, 4.6448, 4.7980, 4.9497,\n 4.8655, 5.0156, 5.1643, 5.0807, 4.9980, 4.9163, 5.0630, 5.2085,\n 5.3526, 5.4956, 5.4140, 5.5556, 5.6959, 5.6149, 5.5348, 5.6737,\n 5.8114, 5.7318, 5.8684, 5.7894, 5.9247, 6.0590, 6.1923, 6.1137,\n 6.2459, 6.1680, 6.2990, 6.2217, 6.3517, 6.2750, 6.4039, 6.5320,\n 6.4558, 6.5828, 6.7090, 6.6332, 6.5582, 6.4838, 6.6088, 6.7330,\n 6.8564, 6.9789, 7.1007, 7.0265, 6.9529, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.3584, 7.2857, 7.4034, 7.5204, 7.4482, 7.5644, 7.6800,\n 7.6082, 7.5369, 7.4662, 7.5809, 7.6950, 7.8084, 7.7380, 7.6681,\n 7.5988, 7.5299, 7.6424, 7.5740, 7.6859, 7.6179, 7.7291, 7.6615,\n 7.7720, 7.8820, 7.8147, 7.9241, 8.0328, 7.9659, 7.8995, 7.8335,\n 7.9415, 8.0490, 8.1560, 8.2624, 8.3683, 8.4736, 8.4078, 8.3423,\n 8.4471, 8.3820, 8.3173, 8.4215, 8.5252, 8.6284, 8.7311, 8.6667,\n 8.6026, 8.7048, 8.8065, 8.7427, 8.8439, 8.9446, 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Zapatero visited the following cities in four days: Brasilia, S\u00e3o Paulo, Buenos Aires and Santiago de Chile. According to official sources these visits are the last part of the project he began at the EU-Latin American Summit in Guadalajara, Mexico and pursued in the Ibero-American meeting in Costa Rica in November.\nHypothesis: Zapatero participated in the Ibero-American meeting in Costa Rica.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "8.0%", + "z-score": "-5.53", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.4959, -2.5538, -2.6107, -2.6667,\n -2.7218, -2.7761, -2.8296, -2.8823, -2.9343, -2.9856, -3.0361, -3.0861,\n -3.1353, -3.1840, -3.2321, -3.2796, -3.3265, -3.3729, -3.4187, -3.4641,\n -3.5090, -3.5533, -3.5973, -3.6407, -3.6838, -3.7264, -3.7685, -3.8103,\n -3.8517, -3.8927, -3.9333, -3.9736, -4.0135, -4.0531, -4.0923, -4.1312,\n -4.1698, -4.2080, -4.2460, -4.2836, -4.3209, -4.3580, -4.3948, -4.4313,\n -4.4675, -4.5035, -4.5392, -4.5747, -4.6099, -4.6448, -4.6796, -4.7140,\n -4.7483, -4.7823, -4.8161, -4.8497, -4.8831, -4.6876, -4.7217, -4.7556,\n -4.7892, -4.8226, -4.8559, -4.8889, -4.7005, -4.7341, -4.7676, -4.8008,\n -4.8338, -4.8666, -4.8993, -4.9317, -4.9640, -4.7834, -4.8162, -4.8488,\n -4.8812, -4.9135, -4.9455, -4.9774, -5.0091, -5.0406, -4.8670, -4.8990,\n -4.9308, -4.9624, -4.9939, -5.0252, -5.0563, -5.0873, -5.1181, -4.9507,\n -4.9820, -5.0130, -5.0439, -5.0747, -5.1053, -5.1357, -5.1660, -5.1962,\n -5.0344, -5.0649, -5.0952, -5.1255, -5.1555, -5.1854, -5.2152, -5.2449,\n -5.2744, -5.1177, -5.1475, -5.1772, -5.2068, -5.2362, -5.2655, -5.2947,\n -5.3237, -5.3526, -5.2005, -5.2297, -5.2588, -5.2877, -5.3165, -5.3452,\n -5.3738, -5.4023, -5.4306, -5.2827, -5.3113, -5.3398, -5.3682, -5.3964,\n -5.4245, -5.4526, -5.4805, -5.5082, -5.3643, -5.3923, -5.4202, -5.4480,\n -5.4757, -5.5033, -5.5308, -5.5582, -5.5855, -5.4451, -5.4726, -5.5000,\n -5.5273, -5.5545, -5.5816, -5.6085, -5.6354, -5.6622, -5.5252])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.9795, 2.2133, 2.4422, 2.6667,\n 2.8868, 3.1027, 2.9913, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 3.9595, 3.8552, 4.0415,\n 3.9386, 3.8376, 3.7383, 3.9208, 3.8228, 4.0024, 4.1797, 4.0825,\n 3.9869, 3.8927, 4.0667, 4.2385, 4.1451, 4.0531, 3.9624, 4.1312,\n 4.0415, 3.9530, 4.1192, 4.2836, 4.4462, 4.3580, 4.5186, 4.6775,\n 4.8347, 4.9904, 5.1444, 5.0562, 4.9691, 4.8830, 4.7980, 4.9497,\n 4.8655, 4.7823, 4.9322, 5.0807, 5.2278, 5.1450, 5.2906, 5.4349,\n 5.5780, 5.4956, 5.4140, 5.3333, 5.4747, 5.6149, 5.7540, 5.8919,\n 6.0287, 6.1644, 6.2991, 6.4327, 6.3517, 6.2716, 6.4040, 6.3246,\n 6.4558, 6.5861, 6.7155, 6.8439, 6.7648, 6.8922, 6.8138, 6.9402,\n 6.8624, 6.7854, 6.7090, 6.6332, 6.7585, 6.6833, 6.6088, 6.7330,\n 6.8564, 6.9789, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.3584, 7.4762, 7.4034, 7.3312, 7.2596, 7.1886, 7.3054,\n 7.2348, 7.3508, 7.4662, 7.5809, 7.5106, 7.4409, 7.3717, 7.3030,\n 7.4168, 7.3485, 7.2807, 7.2134, 7.3263, 7.4386, 7.3717, 7.3051,\n 7.4167, 7.5277, 7.6381, 7.7480, 7.6816, 7.7908, 7.7249, 7.8335,\n 7.7679, 7.7028, 7.6381, 7.5738, 7.6816, 7.6177, 7.7249, 7.8316,\n 7.9377, 7.8740, 7.8107, 7.7478, 7.6853, 7.7907, 7.7285, 7.6667,\n 7.6052, 7.7099, 7.8142, 7.7530, 7.8567, 7.7958, 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Jakarta lies on a low, flat alluvial plain with historically extensive swampy areas; the parts of the city farther inland are slightly higher.\nHypothesis: The parts of Jakarta away from the coast are on slightly higher land.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.0605, 1.9415, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.8728, 1.7685, 1.6667,\n 1.8970, 2.1229, 2.0211, 1.9215, 1.8240, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275, 1.7321,\n 1.6471, 1.8477, 2.0455, 2.2404, 2.1546, 2.0702, 1.9870, 2.1773,\n 2.0948, 2.0135, 1.9333, 1.8543, 2.0397, 1.9612, 1.8838, 2.0656,\n 2.2453, 2.1678, 2.3448, 2.2678, 2.1918, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 1.9262, 1.8559, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.9829, 1.9149, 2.0785, 2.2405, 2.1723, 2.1049, 2.2646,\n 2.4228, 2.5796, 2.5117, 2.4444, 2.3779, 2.5322, 2.4660, 2.4004,\n 2.5527, 2.7037, 2.8534, 2.7875, 2.7222, 2.6575, 2.5934, 2.7406,\n 2.6768, 2.6135, 2.5508, 2.4887, 2.6336, 2.5717, 2.5103, 2.4495,\n 2.5925, 2.5318, 2.4717, 2.6131, 2.5532, 2.4938, 2.4348, 2.3764,\n 2.5156, 2.4574, 2.3995, 2.5373, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.4453, 2.3891, 2.3333, 2.4678, 2.4122, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.0868, 2.2188, 2.1656, 2.1128, 2.2436, 2.1909,\n 2.1386, 2.0866, 2.0350, 1.9837, 1.9327, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.5492, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.6555, 1.7780, 1.8999, 1.8527, 1.8058, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.9068, 1.8605, 1.8145, 1.7688, 1.7233, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.9115, 7.8074, 7.9455, 8.0822, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.6359, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 8.9469,\n 8.8529, 8.9763, 9.0987, 9.2202, 9.3408, 9.2480, 9.1561, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.5400, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.3683, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.6306, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.9689, 10.8867, 10.8051, 10.9091, 11.0125, 11.1154, 11.2178, 11.1370,\n 11.0569, 11.1588, 11.2602, 11.3610, 11.4614, 11.3820, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.9340, 11.8571, 11.7808, 11.8771, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.2954, 12.3888, 12.3143, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.5367, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Money raised from the sale will go into a trust for Hepburn's family.\nHypothesis: Proceeds go to Hepburn's family.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.3637, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.4606, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.5023, 0.6667, 0.8295, 0.9909, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.2319, 0.3698, 0.3225, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.4233, 0.5489, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.7461, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165, 1.3472,\n 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641, 3.2206, 3.5382,\n 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998, 3.1177, 2.9439, 3.2222,\n 3.0551, 2.8947, 2.7406, 2.5924, 2.4495, 2.7136, 2.5744, 2.4398, 2.3094,\n 2.5627, 2.4351, 2.3113, 2.5560, 2.7952, 3.0290, 2.9055, 2.7852, 3.0123,\n 3.2348, 3.1160, 3.0000, 3.2167, 3.4293, 3.3147, 3.5228, 3.4101, 3.6141,\n 3.8146, 3.7033, 3.5942, 3.4873, 3.6831, 3.8759, 3.7700, 3.6662, 3.8552,\n 4.0415, 3.9386, 4.1219, 4.0205, 4.2008, 4.3788, 4.5544, 4.7278, 4.8990,\n 5.0680, 5.2350, 5.1333, 5.0332, 5.1978, 5.3605, 5.2615, 5.1640, 5.3245,\n 5.2281, 5.1332, 5.0395, 4.9472, 5.1051, 5.0138, 4.9237, 4.8347, 4.7469,\n 4.9023, 5.0562, 4.9691, 5.1212, 5.0350, 5.1855, 5.3345, 5.4822, 5.3964,\n 5.3116, 5.4576, 5.6023, 5.5181, 5.4349, 5.5780, 5.7199, 5.6373, 5.7778,\n 5.6959, 5.8351, 5.9732, 6.1101, 6.2459, 6.3807, 6.5144, 6.6471, 6.5653,\n 6.4842, 6.6157, 6.7462, 6.6658, 6.5861, 6.7155, 6.6365, 6.5583, 6.4807,\n 6.4039, 6.5320, 6.4558, 6.3803, 6.3054, 6.2312, 6.3580, 6.4838, 6.4101,\n 6.5350, 6.4618, 6.5857, 6.7089, 6.8313, 6.7584, 6.6861, 6.8075, 6.9282,\n 6.8563, 6.7850, 6.9048, 7.0238, 6.9529, 7.0711, 7.0006, 7.1181, 7.2348,\n 7.3508, 7.4662, 7.5809, 7.6950, 7.8084, 7.7380, 7.6681, 7.7808, 7.8928,\n 7.8233, 7.7544, 7.8657, 7.7971, 7.7291, 7.6615, 7.5944, 7.7048, 7.6381,\n 7.5719, 7.5061, 7.4407, 7.5503, 7.6594, 7.5944, 7.7028, 7.6381, 7.7460,\n 7.8533, 7.9601, 7.8956, 7.8316, 7.9377, 8.0433, 7.9796, 7.9162, 8.0212,\n 8.1258, 8.0627, 8.1667, 8.1039, 8.2074, 8.3103, 8.2479, 8.1858, 8.2882,\n 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: As an active member of the National Guard, he was called to duty in 1941. Although Kennon did not see active combat, he did not return home from World War II until May of 1945.\nHypothesis: Kennon did not participate in WWII.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.0861, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.3086,\n 0.5353, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.2839, 1.4765, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.2577, 1.4222, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 1.1316, 1.0759, 1.2247,\n 1.3725, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.4201, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.7454, 1.8773, 1.8257,\n 1.7746, 1.7237, 1.6732, 1.8033, 1.9327, 1.8821, 1.8317, 1.9599,\n 1.9097, 1.8598, 1.9868, 1.9370, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.7025,\n 1.8252, 1.7780, 1.7310, 1.6843, 1.6378, 1.5916, 1.7128, 1.8333,\n 1.9533, 1.9068, 1.8605, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "89", + "Fraction of T in Greenlist": "44.7%", + "z-score": "6.43", + "p value": "6.57e-11", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547, 1.5403, 1.3608,\n 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428, 1.2702, 1.1323, 1.4444,\n 1.3093, 1.6082, 1.8974, 2.1776, 2.0412, 1.9096, 2.1783, 2.4398, 2.3094,\n 2.1831, 2.4351, 2.3113, 2.1909, 2.0738, 1.9599, 2.2011, 2.4371, 2.6681,\n 2.5538, 2.4422, 2.3333, 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570,\n 2.5690, 2.4689, 2.3706, 2.2743, 2.4804, 2.3851, 2.5873, 2.4930, 2.6914,\n 2.8868, 3.0793, 3.2691, 3.1741, 3.0806, 2.9887, 3.1743, 3.3574, 3.2660,\n 3.4463, 3.6242, 3.8000, 3.9736, 4.1451, 4.0531, 3.9624, 4.1312, 4.2981,\n 4.2080, 4.1192, 4.2836, 4.1957, 4.1090, 4.0234, 3.9389, 4.1003, 4.2601,\n 4.4182, 4.3339, 4.2507, 4.1684, 4.3241, 4.2426, 4.3966, 4.3158, 4.2359,\n 4.1569, 4.3086, 4.2303, 4.1528, 4.3027, 4.2258, 4.1497, 4.2977, 4.2222,\n 4.3687, 4.5140, 4.6580, 4.5826, 4.5079, 4.6503, 4.7916, 4.7173, 4.6437,\n 4.7834, 4.7104, 4.6380, 4.5663, 4.4953, 4.6332, 4.7700, 4.9058, 4.8348,\n 4.7645, 4.6949, 4.8291, 4.7599, 4.8930, 4.8242, 4.7559, 4.6883, 4.8200,\n 4.7527, 4.6860, 4.8164, 4.7501, 4.6843, 4.8135, 4.7481, 4.8763, 5.0037,\n 5.1303, 5.2560, 5.1905, 5.1255, 5.0609, 5.1854, 5.3092, 5.2449, 5.3677,\n 5.4899, 5.6112, 5.7319, 5.8519, 5.7874, 5.7234, 5.8424, 5.9607, 5.8969,\n 5.8336, 5.9510, 5.8880, 5.8254, 5.7633, 5.7016, 5.8179, 5.9336, 6.0487,\n 5.9871, 5.9258, 5.8650, 5.9792, 5.9186, 6.0321, 5.9718, 5.9120, 5.8525,\n 5.9651, 5.9059, 5.8470, 5.9588, 5.9002, 5.8420, 5.9530, 5.8951, 6.0054,\n 6.1153, 6.2246, 6.3333, 6.2753, 6.2177, 6.1604, 6.2684, 6.3758, 6.3187,\n 6.4256])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Auburn High School Athletic Hall of Fame recently introduced its Class of 2005 which includes 10 members.\nHypothesis: The Auburn High School Athletic Hall of Fame has ten members.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.4403, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.4863, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.3128, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.4142, 1.5505, 1.4985,\n 1.4470, 1.5818, 1.5303, 1.4792, 1.4284, 1.5617, 1.5110, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.5236, 1.6530, 1.6036,\n 1.5544, 1.6827, 1.6336, 1.7609, 1.7119, 1.8383, 1.7894, 1.9149,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.6737, 1.7974, 1.9206, 1.8728,\n 1.8252, 1.9473, 1.8999, 1.8527, 1.8058, 1.9267, 1.8799, 2.0000,\n 1.9533, 1.9068, 1.8605, 1.8145, 1.7688, 1.8874, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.0335, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 8.9324, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.7219, 9.6251, 9.7447, 9.8634, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.4829, 10.5955, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.9637, 10.8729, 10.9816, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.0891, 12.0032, 12.1036, 12.2034, 12.1184, 12.0341, 12.1335, 12.0499,\n 12.1489, 12.0660, 11.9837, 12.0824, 12.1805, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.1502, 12.2467, 12.1677, 12.2638, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.8007, 12.8928, 12.9845, 13.0758, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: MEXICO CITY (Reuters) - A deadly strain of swine flu never seen before has broken out in Mexico, killing as many as 60 people and raising fears it is spreading across North America. The World Health Organization said it was concerned about what it called 800 \"influenza-like\" cases in Mexico, and also about a confirmed outbreak of a new strain of swine flu in the United States. It said about 60 people had died in Mexico. Mexico's government said it had confirmed that at least 16 people had died of the swine flu in central Mexico and that there could be another 45 fatal victims.\nHypothesis: 800 Mexicans have been affected by a new form of swine influenza.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.3208, -0.1275, -0.1901, 0.0000, 0.1879, 0.1245, 0.3095, 0.2462,\n 0.4284, 0.6086, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.1374, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.0886, -0.1325, -0.1761, -0.0439, 0.0875, 0.2182, 0.1741,\n 0.3038, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.4620, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.0883, 5.9628, 5.8398, 6.0125, 5.8919, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.4138, 8.3138, 8.4449, 8.3463, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.2514, 10.3630, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.7349, 10.8426, 10.9497, 11.0562, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.0102, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.4450, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.1646, 12.0824, 12.1805, 12.0990, 12.1967, 12.2940,\n 12.2132, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.6283, 12.7226, 12.6439, 12.5657, 12.6597, 12.7532, 12.8464, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.2288, 13.1520, 13.2429, 13.3333,\n 13.2572, 13.3473, 13.4371, 13.5265, 13.6155, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Doctors who treated Ayrton Senna after he crashed during the San Marino Grand Prix have denied allegations that the Brazilian driver died at the Imola track.\nHypothesis: Ayrton Senna had the accident that caused his death at the San Marino Grand Prix.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "23.2%", + "z-score": "-0.465", + "p value": "0.679", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.4549, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.5608, -0.3907, -0.2222, -0.2765, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "46.4%", + "z-score": "5.53", + "p value": "1.64e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.5671, 1.7963, 2.0211, 1.9215, 1.8240, 2.0428, 1.9462, 1.8516,\n 2.0647, 1.9711, 2.1798, 2.0870, 2.2916, 2.4930, 2.4004, 2.5981,\n 2.5064, 2.7005, 2.6098, 2.8006, 2.9887, 2.8983, 2.8093, 2.9938,\n 2.9057, 2.8189, 3.0000, 2.9140, 3.0924, 3.2686, 3.4427, 3.3566,\n 3.5283, 3.6979, 3.8657, 4.0316, 4.1957, 4.3580, 4.2710, 4.1851,\n 4.3451, 4.5035, 4.6603, 4.8154, 4.9691, 5.1212, 5.0350, 4.9497,\n 4.8655, 4.7823, 4.7001, 4.6188, 4.5384, 4.4590, 4.6079, 4.7556,\n 4.6765, 4.8226, 4.7442, 4.6667, 4.8111, 4.9543, 4.8772, 5.0190,\n 5.1597, 5.0829, 5.0070, 4.9317, 4.8572, 4.9960, 4.9221, 5.0596,\n 5.1962, 5.1225, 5.2578, 5.3921, 5.5255])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Two Turkish engineers and an Afghan translator kidnapped in December were freed Friday.\nHypothesis: translator kidnapped in Iraq\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -0.7303,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.0612, 0.1217, 0.0605, 0.2408, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.4288, 0.3736, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.0956, 0.0476, 0.1898, 0.1419, 0.2828, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.6029, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "50.0%", + "z-score": "2.31", + "p value": "0.0105", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 2.3094])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Successful plaintiffs recovered punitive damages in Texas discrimination cases 53% of the time.\nHypothesis: Legal costs to recover punitive damages are a deductible business expense.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.9869, 1.1767, 1.3641, 1.2910,\n 1.4755, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.5511, 1.7150, 1.8773, 1.8116,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.7767, 1.7154, 1.6547, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.7772, 1.9261, 2.0739, 2.0140, 1.9545, 2.1005, 2.2454,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.0948, 2.2361, 2.3764,\n 2.5156, 2.4574, 2.5954, 2.7325, 2.6742, 2.8101, 2.7520, 2.6943,\n 2.6370, 2.5802, 2.7143, 2.8475, 2.7906, 2.7341, 2.6781, 2.8098,\n 2.9406, 2.8845, 2.8288, 2.9584, 2.9029, 2.8478, 2.9761, 2.9212,\n 2.8666, 2.8124, 2.9394, 3.0657, 3.0114, 2.9575, 3.0827, 3.0290,\n 2.9756, 2.9225, 3.0464, 2.9935, 2.9410, 2.8887, 2.8368, 2.9593,\n 2.9076, 2.8561, 2.8050, 2.7541, 2.7036, 2.6534, 2.6034, 2.5538,\n 2.6742, 2.6247, 2.7443, 2.8633, 2.8137, 2.7644, 2.8825, 2.8333,\n 2.7844, 2.7358, 2.6874, 2.8043, 2.9205, 3.0363, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.1550, 8.2916, 8.4270, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 8.9045, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.2923, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.5769, 10.6894, 10.8012, 10.9123, 10.8186,\n 10.7257, 10.8363, 10.9462, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.9060, 12.0096, 12.1125, 12.0218, 11.9319,\n 12.0345, 12.1366, 12.0476, 12.1492, 12.2503, 12.1622, 12.2628, 12.3629,\n 12.4625, 12.5615, 12.4746, 12.3883, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.2593, 13.3537, 13.4477, 13.5412,\n 13.6343, 13.5499, 13.4661, 13.5589, 13.6514, 13.5683, 13.6604, 13.7521,\n 13.6698, 13.7612, 13.8522, 13.9427, 14.0329, 13.9515, 13.8707, 13.9606,\n 13.8804, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.4294, 14.5162, 14.6027, 14.5248, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: With the city's hotels in the midst of a sudden slowdown in business, operators are seeking wage cuts and other concessions from the unions representing 27,500 bellhops, housekeepers and waiters. But the unions are drawing a line across hotel lobbies, from the high-end Carlyle to the more budget-minded Ramada Inn, saying they see no need to bend now that operators are cutting rates in an effort to fill vacant rooms with tourists, executives and other travelers. It is far too soon to judge the financial health of the industry, said Peter Ward, president of the New York Hotel and Motel Trades Council, an alliance of hotel unions.\nHypothesis: The New York Hotel and Motel Trades Council is an alliance of hotel unions.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.1547, 0.9802, 0.8165,\n 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714, 0.3464, 0.6794, 0.5556,\n 0.8729, 0.7505, 0.6325, 0.9333, 0.8165, 0.7035, 0.9901, 1.2687, 1.1547,\n 1.0441, 0.9366, 0.8321, 0.7303, 0.6312, 0.5345, 0.7924, 1.0445, 1.2910,\n 1.1918, 1.0948, 1.0000, 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714,\n 0.3892, 0.3086, 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637,\n 0.2887, 0.5013, 0.4264, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.5846, 0.5164, 0.7057,\n 0.6376, 0.8238, 0.7559, 0.6888, 0.8716, 1.0523, 0.9847, 1.1628, 1.3389,\n 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 1.1785, 1.3483, 1.2831, 1.2185,\n 1.1547, 1.0915, 1.2577, 1.4222, 1.3587, 1.5213, 1.4580, 1.3954, 1.3333,\n 1.2719, 1.4313, 1.3700, 1.3093, 1.2492, 1.1896, 1.3460, 1.5010, 1.4412,\n 1.5945, 1.5348, 1.4757, 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316,\n 1.0759, 1.2247, 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423,\n 1.3862, 1.3318, 1.2778, 1.2243, 1.1711, 1.3128, 1.4535, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.2257, 1.1746, 1.3112, 1.4470,\n 1.3957, 1.3448, 1.4792, 1.4284, 1.3779, 1.5110, 1.4606, 1.5926, 1.5423,\n 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254, 1.3768, 1.3284, 1.2804,\n 1.4087, 1.3607, 1.3131, 1.4402, 1.3926, 1.3453, 1.4713, 1.5967, 1.5492,\n 1.5020, 1.6262, 1.7498, 1.7025, 1.6555, 1.7780, 1.7310, 1.6843, 1.6378,\n 1.7592, 1.7128, 1.6667, 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592,\n 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "15", + "# Tokens in Greenlist": "7", + "Fraction of T in Greenlist": "46.7%", + "z-score": "1.94", + "p value": "0.0263", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The discovery is based on velocity measurements of a whirlpool of hot gas orbiting the black hole.\nHypothesis: Hubble discovers black holes.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.5175, 0.7332, 0.9456, 0.8660,\n 0.7877, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.0441, 1.3112, 1.5717, 1.8257,\n 2.0738, 1.9599, 2.2011, 2.4371, 2.3238, 2.5538, 2.4422, 2.3333,\n 2.5568, 2.7761, 2.9913, 2.8823, 3.0929, 3.2998, 3.1918, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.6568, 4.5544, 4.7278, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.6715, 4.5760, 4.4820, 4.3894,\n 4.5547, 4.7181, 4.6262, 4.7875, 4.6967, 4.6070, 4.7662, 4.9237,\n 5.0795, 5.2338, 5.1444, 5.2970, 5.2086, 5.3594, 5.5088, 5.6569,\n 5.8035, 5.9488, 6.0927, 6.0044, 6.1470, 6.2883, 6.4283, 6.5672,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.5807, 6.4957, 6.6308, 6.7648,\n 6.8977, 7.0296, 7.1605, 7.0759, 7.2058, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.7566, 7.8808, 8.0042, 8.1266, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.3231, 8.2413, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.2867, 9.3993, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.1955, 10.1151,\n 10.2220, 10.3284, 10.2486, 10.3544, 10.2753, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.7959, 10.8984, 11.0004, 11.1018, 11.2028, 11.1245, 11.0468,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.1933, 11.2924, 11.3910, 11.4891,\n 11.5868, 11.5109, 11.6082, 11.7050, 11.8014, 11.8973, 11.8221, 11.9176,\n 12.0127, 11.9380, 12.0327, 11.9586, 12.0529, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.5183, 12.6102, 12.5367, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A place of sorrow, after Pope John Paul II died, became a place of celebration, as Roman Catholic faithful gathered in downtown Chicago to mark the installation of new Pope Benedict XVI.\nHypothesis: Pope John Paul II died.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.4288, 0.3736, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.5507, 0.4988, 0.4472, 0.5941,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.1794, 1.3114, 1.4427, 1.5731, 1.7028, 1.8317, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.9370, 1.8875, 1.8383, 1.7894, 1.9149,\n 1.8660, 1.8175, 1.9419, 2.0656, 2.1886, 2.3110, 2.2620, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.3098, 2.2618, 2.2141, 2.3333,\n 2.4520, 2.4042, 2.3567, 2.4744, 2.5915, 2.5439, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 3.7017, 3.5590, 3.7905, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 4.0451, 4.2563, 4.4634, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 6.8876, 6.7769, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.0133, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.0951, 9.2143, 9.3326, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.2592, 10.3683, 10.2837, 10.3923,\n 10.3085, 10.4164, 10.5238, 10.4407, 10.5475, 10.6537, 10.7594, 10.8644,\n 10.9689, 10.8867, 10.8051, 10.9091, 11.0125, 10.9317, 11.0346, 10.9545,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.1018, 11.2028, 11.3032, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.9730, 11.8973, 11.9928, 11.9176,\n 12.0127, 12.1073, 12.0327, 12.1270, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.3525, 12.4448, 12.5367, 12.4638, 12.5553, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + } + ], + "metrics": { + "accuracy_without_watermark": 0.53, + "accuracy_with_watermark": 0.49, + "f1_without_watermark": 0.5123975516132379, + "f1_with_watermark": 0.4357782940590773 + } + }, + "validation": { + "results": [ + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation.\nHypothesis: Christopher Reeve had an accident.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.6312, 0.8909, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.8944, 1.1088, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 1.0773, 1.0079, 1.1898, 1.1206, 1.2999, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.6854, 1.8559, 1.7865, 1.7178, 1.6499,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.5852,\n 1.7467, 1.9066, 1.8419, 1.7778, 1.7143, 1.8716, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.7154, 1.6547, 1.8071, 1.9582, 1.8974,\n 2.0470, 2.1954, 2.3426, 2.4887, 2.4271, 2.3660, 2.5103, 2.6536,\n 2.5925, 2.5318, 2.4717, 2.6131, 2.5532, 2.6933, 2.8324, 2.9704,\n 2.9103, 3.0471, 3.1831, 3.3181, 3.2577, 3.1977, 3.1382, 3.2717,\n 3.2124, 3.1536, 3.0952, 3.0373, 2.9798, 2.9227, 3.0540, 3.1844,\n 3.3140, 3.2567, 3.1998, 3.1433, 3.2715, 3.2152, 3.3424, 3.2863,\n 3.2306, 3.1753, 3.1203, 3.0657, 3.1912, 3.3160, 3.4401, 3.3853,\n 3.5085, 3.6310, 3.7528, 3.6979, 3.6433, 3.5890, 3.7097, 3.6556,\n 3.6019, 3.5485, 3.4954, 3.4427, 3.3902, 3.5093, 3.6277, 3.7455,\n 3.6929, 3.6407, 3.5887, 3.7055, 3.8216, 3.7697, 3.7180, 3.6667,\n 3.6156, 3.5648, 3.5143, 3.6291, 3.7432, 3.6927, 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.3221, 3.1844, 3.4207, 3.6515,\n 3.5165, 3.3853, 3.6098, 3.4816, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.9962, 4.8712, 4.7488, 4.9377,\n 4.8177, 4.7002, 4.8857, 5.0684, 4.9528, 4.8394, 5.0190, 4.9075,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.7689, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 5.9333, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.0943, 5.9932, 6.1471, 6.0474, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.6944, 6.8391, 6.9824, 7.1243, 7.0268, 6.9305, 7.0711,\n 6.9759, 6.8819, 7.0211, 7.1591, 7.0662, 6.9743, 7.1110, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.5556, 7.6867, 7.5967, 7.7268, 7.8558,\n 7.9839, 7.8948, 7.8065, 7.9336, 7.8463, 7.7598, 7.8859, 8.0111,\n 7.9254, 7.8406, 7.9649, 7.8808, 8.0042, 8.1266, 8.2483, 8.3691,\n 8.4891, 8.4057, 8.5249, 8.6433, 8.7610, 8.6783, 8.5964, 8.7133,\n 8.6321, 8.5516, 8.6677, 8.7831, 8.7033, 8.6241, 8.7388, 8.6603,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.4752, 9.3979, 9.3212, 9.4299, 9.3537, 9.2782, 9.3863, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.9038, 9.8303, 9.9340, 10.0371, 10.1398, 10.0668, 9.9944, 10.0965,\n 10.0245, 9.9531, 10.0547, 10.1558, 10.0848, 10.0143, 10.1149, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.4427, 10.5410, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.6944, 10.6256, 10.7222, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Yet, we now are discovering that antibiotics are losing their effectiveness against illness. Disease-causing bacteria are mutating faster than we can come up with new antibiotics to fight the new variations.\nHypothesis: Bacteria is winning the war against antibiotics.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "106", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "34.9%", + "z-score": "2.36", + "p value": "0.00926", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547, 0.9802, 1.3608,\n 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142, 1.2702, 1.5852, 1.8889,\n 2.1822, 2.0370, 1.8974, 1.7628, 2.0412, 1.9096, 1.7823, 1.6590, 1.9245,\n 1.8034, 2.0605, 1.9415, 2.1909, 2.4345, 2.3163, 2.2011, 2.4371, 2.3238,\n 2.2133, 2.1054, 2.0000, 1.8970, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570,\n 2.2576, 2.1602, 2.3706, 2.5775, 2.4804, 2.3851, 2.5873, 2.4930, 2.4004,\n 2.3094, 2.5064, 2.4163, 2.3276, 2.5205, 2.7107, 2.6222, 2.5352, 2.4495,\n 2.3651, 2.2819, 2.2000, 2.3842, 2.5660, 2.7456, 2.6632, 2.5820, 2.7585,\n 2.6778, 2.5983, 2.5198, 2.4423, 2.3658, 2.5378, 2.4618, 2.3868, 2.3126,\n 2.2393, 2.1669, 2.0954, 2.2629, 2.4286, 2.5927, 2.5207, 2.6828, 2.6112,\n 2.5403, 2.4703, 2.4010, 2.3324, 2.2646, 2.4228, 2.3552])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094, 2.6605, 2.9938,\n 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.2998, 3.1177, 3.3968, 3.2222,\n 3.4915, 3.7524, 4.0056, 3.8367, 3.6742, 3.9196, 4.1586, 4.0012, 3.8490,\n 4.0814, 3.9337, 4.1603, 4.3818, 4.5985, 4.4544, 4.3142, 4.5260, 4.7336,\n 4.5968, 4.4634, 4.6667, 4.5363, 4.7357, 4.9316, 5.1241, 4.9962, 4.8712,\n 5.0602, 5.2463, 5.1236, 5.0034, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009,\n 5.4848, 5.3709, 5.5432, 5.7133, 5.6011, 5.4909, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386, 6.0943,\n 5.9932, 6.1471, 6.2994, 6.4501, 6.3502, 6.2517, 6.4008, 6.5483, 6.4510,\n 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354, 6.7414, 6.6486, 6.7890,\n 6.9282, 6.8364, 6.7456, 6.8834, 6.7937, 6.9303, 7.0657, 7.2001, 7.1111,\n 7.0231, 7.1563, 7.2884, 7.2012, 7.1149, 7.2459, 7.1605, 7.2904, 7.4193,\n 7.5472, 7.4625, 7.3786, 7.5056, 7.6315, 7.5484, 7.4661, 7.5910, 7.5094,\n 7.6335, 7.7567, 7.8791, 7.7981, 7.7178, 7.8393, 7.9600, 7.8803, 7.8014,\n 7.9212, 7.8429, 7.9619, 8.0801, 8.1976, 8.1198, 8.0427, 8.1594, 8.2754,\n 8.1988, 8.1229, 8.2381, 8.1628, 8.2772, 8.3910, 8.5041, 8.4293, 8.3550,\n 8.4674, 8.5792, 8.5054, 8.4322, 8.5433, 8.4706, 8.5810, 8.6908, 8.8000,\n 8.7278, 8.6560, 8.7646, 8.8726, 8.8013, 8.7305, 8.8379, 8.7676, 8.8744,\n 8.9806, 9.0863, 9.0164, 8.9469, 9.0520, 9.1566, 9.0876, 9.0190, 9.1230,\n 9.0549, 9.1584, 9.2613, 9.3638, 9.2960, 9.2287, 9.3306, 9.4321, 9.3651,\n 9.2986, 9.3995, 9.3333, 9.4338, 9.5338, 9.6334, 9.5675, 9.5021, 9.6011,\n 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Cairo is now home to some 15 million people - a burgeoning population that produces approximately 10,000 tonnes of rubbish per day, putting an enormous strain on public services. In the past 10 years, the government has tried hard to encourage private investment in the refuse sector, but some estimate 4,000 tonnes of waste is left behind every day, festering in the heat as it waits for someone to clear it up. It is often the people in the poorest neighbourhoods that are worst affected. But in some areas they are fighting back. In Shubra, one of the northern districts of the city, the residents have taken to the streets armed with dustpans and brushes to clean up public areas which have been used as public dumps.\nHypothesis: 15 million tonnes of rubbish are produced daily in Cairo.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "13.1%", + "z-score": "-3.89", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -1.7538, -1.8074,\n -1.6038, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.5322, -2.5756, -2.6186,\n -2.6613, -2.7037, -2.5304, -2.5731, -2.6154, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.7369, -2.5717, -2.4079, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.8472, -2.8845, -2.9216, -2.9584, -2.9950, -3.0315, -2.8846, -2.9212,\n -2.9576, -2.9938, -3.0298, -2.8853, -2.9215, -2.9575, -2.9933, -3.0290,\n -3.0644, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.3044, -3.3381, -3.3716, -3.4050,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.6987, -3.7306, -3.7624, -3.7940, -3.8255, -3.8569, -3.8881])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.2372, 5.4678, 5.2705, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.8420, 7.6862, 7.8512, 7.6996, 7.8628, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 8.8082, 8.9544, 9.0990, 8.9618, 8.8271, 8.9709, 9.1130, 8.9815,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.6192, 8.7599, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.2055, 9.3386, 9.4705, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.6309, 9.5191, 9.4088, 9.5368, 9.6638,\n 9.7897, 9.9146, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.9955, 11.1111, 11.2259, 11.1197, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 11.7992, 11.9083, 12.0167,\n 11.9144, 11.8132, 11.9213, 12.0286, 11.9288, 12.0357, 12.1419, 12.2474,\n 12.3524, 12.2541, 12.3586, 12.2615, 12.1652, 12.2694, 12.3729, 12.4759,\n 12.5782, 12.4834, 12.5853, 12.4915, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 13.0922, 13.1905, 13.2882, 13.3854, 13.4822, 13.5784, 13.6742,\n 13.7694, 13.8642, 13.9585, 13.8675, 13.9615, 14.0550, 14.1481, 14.2408,\n 14.3330, 14.4248, 14.5161, 14.4267, 14.5178, 14.6084, 14.5199, 14.4321,\n 14.5226, 14.6126, 14.7023, 14.7916, 14.8804, 14.9689, 14.8825, 14.7966,\n 14.8849, 14.9729, 15.0605, 15.1477, 15.2345, 15.3210, 15.2364, 15.3226,\n 15.4085, 15.3247, 15.4103, 15.4956, 15.5805, 15.6651, 15.5823, 15.6667,\n 15.5845, 15.5028, 15.5870, 15.6709, 15.7545, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Amish community in Pennsylvania, which numbers about 55,000, lives an agrarian lifestyle, shunning technological advances like electricity and automobiles. And many say their insular lifestyle gives them a sense that they are protected from the violence of American society. But as residents gathered near the school, some wearing traditional garb and arriving in horse-drawn buggies, they said that sense of safety had been shattered. \"If someone snaps and wants to do something stupid, there's no distance that's going to stop them,\" said Jake King, 56, an Amish lantern maker who knew several families whose children had been shot.\nHypothesis: Pennsylvania has the biggest Amish community in the U.S.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "28.0%", + "z-score": "0.775", + "p value": "0.219", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.8452, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.8296, 0.7746])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 6.2361, 6.0751, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 8.9935, 9.1333, 9.2717, 9.1455, 9.0213, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.5940, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 10.8477, 10.9669, 11.0851, 11.2025, 11.0902, 11.2069, 11.3228,\n 11.2124, 11.3276, 11.2187, 11.1111, 11.2259, 11.3399, 11.4531, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 11.9083, 12.0167,\n 12.1244, 12.0223, 12.1295, 12.2360, 12.3419, 12.4471, 12.5517, 12.6557,\n 12.5557, 12.4567, 12.5604, 12.6635, 12.7660, 12.8679, 12.9692, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.2722, 13.3710, 13.4691, 13.5668, 13.6640,\n 13.7606, 13.6656, 13.5714, 13.6679, 13.7638, 13.8593, 13.9543, 14.0488,\n 14.1428, 14.2364, 14.3295, 14.2373, 14.3301, 14.4225, 14.3313, 14.4234,\n 14.5150, 14.4248, 14.3352, 14.2464, 14.3380, 14.4292, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.8789, 14.9677, 15.0560, 14.9689, 14.8825, 14.9707,\n 15.0585, 15.1460, 15.2331, 15.3198, 15.4062, 15.4922, 15.5778, 15.6631,\n 15.5783, 15.6633, 15.5792, 15.6640, 15.7485, 15.8327, 15.9165, 15.8333,\n 15.9169, 16.0002, 15.9178, 16.0009, 16.0836, 16.1660, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Security forces were on high alert after an election campaign in which more than 1,000 people, including seven election candidates, have been killed.\nHypothesis: Security forces were on high alert after a campaign marred by violence.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "33.3%", + "z-score": "2.43", + "p value": "0.00762", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.7823, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.3744, 1.6013, 1.5068, 1.7285, 1.6348, 1.5430,\n 1.7589, 1.9711, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.8477, 1.7634, 1.9604, 1.8766, 1.7942, 1.9870, 2.1773,\n 2.3651, 2.2819, 2.4667, 2.3842, 2.3028, 2.4841, 2.6632, 2.8402,\n 2.7585, 2.6778, 2.5983, 2.5198, 2.4423, 2.3658, 2.5378, 2.4618,\n 2.6316, 2.5560, 2.4814, 2.4077, 2.5743, 2.5011, 2.6656, 2.5927,\n 2.5207, 2.6828, 2.8433, 3.0022, 2.9299, 2.8583, 2.7875, 2.7175,\n 2.6481, 2.5796, 2.7349, 2.6667, 2.8203, 2.7524, 2.6852, 2.8368,\n 2.9872, 3.1363, 3.0688, 3.2163, 3.1492, 3.0827, 3.2285, 3.3731,\n 3.5166, 3.4499, 3.3838, 3.3182, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.2025, 3.3420, 3.2788, 3.2161, 3.1539, 3.2918, 3.4286, 3.3665,\n 3.3049, 3.2437, 3.1831, 3.1229, 3.2577, 3.1977, 3.1382, 3.0792,\n 3.0206, 2.9625, 2.9048, 3.0373, 2.9798, 2.9227, 2.8660, 2.8098,\n 2.7539, 2.6984, 2.6433, 2.5886, 2.5343, 2.4803, 2.4267])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.2515, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.0186, 4.8742, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 4.9377,\n 4.8177, 5.0034, 5.1864, 5.0684, 4.9528, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.9747, 5.1490, 5.0410, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.3383, 5.5035, 5.6667, 5.5630, 5.4610, 5.3605, 5.2615, 5.1640,\n 5.0679, 5.2281, 5.3867, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622,\n 5.8139, 5.9641, 5.8707, 6.0193, 5.9270, 5.8358, 5.9827, 5.8926,\n 5.8035, 5.7155, 5.6285, 5.7735, 5.9172, 5.8310, 5.7457, 5.8878,\n 6.0288, 5.9442, 5.8605, 5.7778, 5.6959, 5.6149, 5.7540, 5.8919,\n 6.0287, 5.9481, 6.0837, 6.2183, 6.3517, 6.2716, 6.1923, 6.3246,\n 6.4558, 6.3770, 6.2990, 6.2217, 6.1451, 6.0693, 6.1990, 6.3278,\n 6.4558, 6.3803, 6.5072, 6.6332, 6.7585, 6.6833, 6.6088, 6.7330,\n 6.8564, 6.7823, 6.7089, 6.6361, 6.5639, 6.4923, 6.6144, 6.7358,\n 6.8563, 6.7850, 6.9048, 7.0238, 7.1421, 7.0711, 7.0006, 7.1181,\n 7.2348, 7.1647, 7.0952, 7.0262, 6.9577, 6.8897, 7.0054, 7.1204,\n 7.2348, 7.1670, 7.2807, 7.3937, 7.5061, 7.4386, 7.3717, 7.4833,\n 7.5944, 7.5277, 7.4615, 7.3958, 7.3305, 7.2656, 7.3758, 7.4853,\n 7.5944, 7.5297, 7.6381, 7.7460, 7.8533, 7.7889, 7.7249, 7.8316,\n 7.9377, 7.8740, 7.8107, 7.7478, 7.6853, 7.6231, 7.7285, 7.8333,\n 7.9377, 7.8758, 7.9796, 8.0829, 8.1858, 8.1240, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: In 1979, the leaders signed the Egypt-Israel peace treaty on the White House lawn. Both President Begin and Sadat received the Nobel Peace Prize for their work. The two nations have enjoyed peaceful relations to this day.\nHypothesis: The Israel-Egypt Peace Agreement was signed in 1979.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.8131, 0.7332, 0.9456, 1.1547,\n 1.3606, 1.5635, 1.4812, 1.4003, 1.3206, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 1.0954, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 0.8003, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 0.8889, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 1.0141, 0.9567, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.3014, 1.4517, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.2243, 1.1711, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 1.0465, 0.9979, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 1.0690,\n 1.0215, 0.9742, 0.9272, 1.0565, 1.0096, 1.1380, 1.0911, 1.0445,\n 0.9981, 1.1251, 1.0788, 1.0328, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.9027, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 8.0017, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.4884, 8.3391, 8.4936, 8.3480, 8.5010, 8.6522, 8.5105, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 8.8271, 8.9709, 9.1130, 8.9815,\n 8.8522, 8.9935, 9.1333, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.6867, 9.8187, 9.6960, 9.8271, 9.9570, 10.0857, 10.2132, 10.3397,\n 10.4650, 10.3459, 10.4704, 10.5940, 10.4770, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.9727, 10.8616, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.0254, 11.1392, 11.2522, 11.1500, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.5157, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 11.9754, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.1012, 12.2040, 12.3063, 12.4081, 12.3168,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.6287, 12.7279, 12.6387, 12.7376,\n 12.8359, 12.7476, 12.6601, 12.7581, 12.8556, 12.9527, 12.8661, 12.9628,\n 13.0590, 13.1547, 13.0690, 12.9840, 13.0795, 13.1745, 13.2690, 13.1849,\n 13.1014, 13.0185, 13.1129, 13.0307, 13.1246, 13.0431, 12.9621, 12.8817,\n 12.9755, 12.8957, 12.9891, 13.0821, 13.1746, 13.0956, 13.0171, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.0608, 13.1520, 13.2429, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: singer and actress Britney Spears, 24, has filled papers in Los Angeles County Superior Court to divorce her husband Kevin Federline, 28. A spokeswoman for the court, Kathy Roberts stated that the papers cited irreconcilable differences\" as the reason for the divorce and have, according to the courts, been legally separated as of Monday, November 6, the same day that Spears appeared on Late Night with David Letterman.\nHypothesis: Spears is to divorce from Kevin Federline.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.0000, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.8452, 1.0094, 0.9488, 0.8889, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.4059, 1.3460, 1.2865, 1.4412, 1.5945, 1.5348, 1.4757,\n 1.6271, 1.7772, 1.7179, 1.6591, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.9825, 2.1268, 2.0682, 2.0101, 2.1527, 2.2943, 2.2361, 2.1783,\n 2.3183, 2.4574, 2.3995, 2.3422, 2.4797, 2.6163, 2.5589, 2.5019,\n 2.6370, 2.7713, 2.7143, 2.6576, 2.7906, 2.9227, 2.8660, 2.8098,\n 2.9406, 3.0706, 3.0143, 2.9584, 3.0872, 3.2152, 3.1593, 3.1038,\n 3.2306, 3.3567, 3.3012, 3.2460, 3.3710, 3.4953, 3.4401, 3.3853,\n 3.5085, 3.6310, 3.5762, 3.5218, 3.6433, 3.7641, 3.7097, 3.6556,\n 3.7755, 3.8947, 3.8406, 3.7869, 3.9052, 4.0228, 3.9691, 3.9158,\n 4.0325, 4.1487, 4.0953, 4.0423, 4.1576, 4.2723, 4.2193, 4.1667,\n 4.2805, 4.3938, 4.3412, 4.2889, 4.4014, 4.5134, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.1389, 5.9530, 6.1584, 6.3594, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 8.0017, 7.8420, 7.6862, 7.8512, 7.6996, 7.8628, 7.7152,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 8.8667, 8.7419, 8.6192, 8.7599, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 8.8405, 8.9753, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.4685, 9.5939, 9.4896, 9.6141, 9.5111,\n 9.4094, 9.5331, 9.4327, 9.3333, 9.4563, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 10.1948, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.6894, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.7635, 10.6733, 10.5841, 10.6936,\n 10.6052, 10.5175, 10.6265, 10.7349, 10.8426, 10.7559, 10.8631, 10.7772,\n 10.6920, 10.6076, 10.5238, 10.4407, 10.3583, 10.2766, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 10.9545,\n 11.0569, 11.1588, 11.2602, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.6597, 12.5820, 12.5049, 12.5986,\n 12.6918, 12.6153, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.0910, 13.1815, 13.2717, 13.3615, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Following the successful bid to bring the 2010 Ryder Cup to Wales, the Wales Tourist Board has wasted little time in commissioning work to ensure that the benefits accruing from the event are felt throughout the country.\nHypothesis: Wales to host 2010 Ryder Cup.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.15", + "p value": "0.875", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.4907, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.1488])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 2.9439, 2.7778, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 5.7646, 5.9479, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.2000, 6.0928, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.4501, 6.3502, 6.2517, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.7416, 6.8849, 6.7886, 6.9305, 6.8354,\n 6.9759, 7.1152, 7.0211, 6.9282, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.6867, 7.5967, 7.7268, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.3813, 8.2956, 8.2107, 8.1266, 8.0434, 8.1650,\n 8.2858, 8.2032, 8.1214, 8.0403, 7.9600, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.1585, 8.2760, 8.1976, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.5052, 8.6190, 8.5424, 8.4664, 8.3910, 8.5041, 8.6166,\n 8.5417, 8.6535, 8.7647, 8.6903, 8.8008, 8.9107, 9.0200, 9.1287,\n 9.0548, 9.1629, 9.0895, 9.1970, 9.3040, 9.4103, 9.3374, 9.4432,\n 9.3708, 9.4761, 9.4042, 9.3328, 9.2619, 9.3665, 9.4707, 9.5743,\n 9.6774, 9.6069, 9.7095, 9.6394, 9.5698, 9.6719, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.1750, 10.2743, 10.3730, 10.4713, 10.4021, 10.5000,\n 10.4312, 10.3628, 10.2949, 10.2273, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Steve Jobs was attacked by Sculley and other Apple executives for not delivering enough hot new products and resigned from the company a few weeks later.\nHypothesis: Steve Jobs worked for Apple.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "75", + "Fraction of T in Greenlist": "37.7%", + "z-score": "4.13", + "p value": "1.79e-05", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.9795, 1.8728, 1.7685, 2.0000,\n 2.2269, 2.1229, 2.0211, 2.2418, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.2743, 2.4804, 2.6833, 2.8830, 2.7863, 2.6914, 2.5981,\n 2.5064, 2.7005, 2.6098, 2.5205, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.0948, 2.0135, 1.9333, 1.8543, 1.7765, 1.9612, 1.8838, 2.0656,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.9413, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.1669, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.5207, 2.6828, 2.6112, 2.5403, 2.4703, 2.4010, 2.5600, 2.7175,\n 2.6481, 2.5796, 2.5117, 2.4444, 2.3779, 2.3120, 2.4660, 2.4004,\n 2.5527, 2.7037, 2.6381, 2.5731, 2.5087, 2.6575, 2.8051, 2.9515,\n 3.0967, 3.2408, 3.1755, 3.1109, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.4058, 3.5446, 3.4806, 3.4171, 3.3542, 3.2918, 3.2299, 3.3665,\n 3.3049, 3.2437, 3.3789, 3.3181, 3.2577, 3.1977, 3.1382, 3.0792,\n 3.2124, 3.3447, 3.2857, 3.4170, 3.3582, 3.2998, 3.4298, 3.3717,\n 3.3140, 3.4428, 3.3853, 3.3282, 3.2715, 3.3989, 3.5256, 3.6515,\n 3.5946, 3.7196, 3.6629, 3.7870, 3.9104, 3.8538, 3.9762, 3.9198,\n 3.8638, 3.9853, 3.9294, 3.8740, 3.8189, 3.9392, 4.0589, 4.1779,\n 4.1226, 4.2409, 4.1859, 4.3033, 4.4202, 4.3652, 4.3106, 4.4265,\n 4.3721, 4.3180, 4.2642, 4.2108, 4.1576, 4.1048, 4.0522, 4.0000,\n 3.9481, 4.0622, 4.1758, 4.1239, 4.2369, 4.1851, 4.1337])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.998%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 3.2998, 3.7097, 4.0825, 4.4264,\n 4.7469, 5.0483, 4.6667, 4.9640, 5.2463, 4.9193, 4.6188, 4.9010, 5.1711,\n 5.4306, 5.1640, 5.4175, 5.6622, 5.4174, 5.1855, 4.9652, 4.7556, 5.0000,\n 5.2372, 5.0389, 5.2705, 5.4958, 5.3072, 5.1257, 4.9507, 5.1723, 5.3886,\n 5.6000, 5.8068, 6.0093, 5.8424, 6.0412, 6.2361, 6.4273, 6.6150, 6.4550,\n 6.6398, 6.4846, 6.6667, 6.5158, 6.6953, 6.5485, 6.7254, 6.5823, 6.7568,\n 6.6172, 6.7893, 6.9589, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557,\n 6.9282, 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 6.8458, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.2296, 7.3773,\n 7.2684, 7.4146, 7.3073, 7.4521, 7.5954, 7.7373, 7.6317, 7.7723, 7.6681,\n 7.8074, 7.9455, 7.8428, 7.9796, 7.8782, 8.0139, 7.9138, 8.0483, 7.9495,\n 8.0829, 7.9853, 8.1176, 8.0212, 8.1524, 8.2825, 8.1873, 8.0931, 8.2222,\n 8.1291, 8.2572, 8.3843, 8.2923, 8.2012, 8.1111, 8.0219, 8.1481, 8.2733,\n 8.1850, 8.3093, 8.4327, 8.3453, 8.2588, 8.1731, 8.0882, 8.2107, 8.1266,\n 8.2483, 8.3691, 8.4891, 8.4057, 8.5249, 8.4423, 8.5607, 8.4788, 8.5964,\n 8.5153, 8.6321, 8.5516, 8.6677, 8.5879, 8.7033, 8.8179, 8.7388, 8.6603,\n 8.7742, 8.6963, 8.8095, 8.9221, 8.8448, 8.7681, 8.6921, 8.6166, 8.7284,\n 8.8396, 8.7647, 8.8752, 8.9851, 8.9107, 8.8369, 8.7636, 8.8728, 8.9815,\n 9.0895, 9.0167, 8.9444, 8.8726, 8.9800, 9.0869, 9.1932, 9.1218, 9.2276,\n 9.1567, 9.2619, 9.1915, 9.2961, 9.2261, 9.3302, 9.2607, 9.3642, 9.2952,\n 9.3982, 9.5007, 9.4320, 9.3638, 9.4658, 9.3980, 9.4995, 9.6005, 9.5331,\n 9.4661, 9.3995, 9.3333, 9.4338, 9.5338, 9.4680, 9.5675, 9.6666, 9.6011,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Traditionally, the Brahui of the Raisani tribe are in charge of the law and order situation through the Pass area. This tribe is still living in present day Balochistan in Pakistan.\nHypothesis: The Raisani tribe resides in Pakistan.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.6667, 0.6083, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.9759, 1.1183, 1.0659, 1.0139, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 1.0879, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129,\n 0.8645, 0.9979, 1.1305, 1.0820, 1.0338, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.9742, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 1.0788, 1.0328, 0.9870, 0.9415, 1.0670, 1.0215,\n 0.9763, 0.9313, 1.0555, 1.0106, 0.9659, 0.9215, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "48.7%", + "z-score": "7.74", + "p value": "5.16e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.0037, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.5985, 4.8107, 5.0186, 4.8742, 4.7336, 4.5968, 4.4634, 4.3333,\n 4.2064, 4.0825, 4.2848, 4.1633, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.0684, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.0844, 4.9747, 5.1490, 5.0410, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 6.0928, 5.9874, 5.8835, 5.7812, 5.6804,\n 5.5811, 5.4832, 5.6401, 5.5435, 5.6986, 5.8522, 5.7566, 5.6622,\n 5.5691, 5.4772, 5.3865, 5.2970, 5.2086, 5.3594, 5.2719, 5.4212,\n 5.3345, 5.4822, 5.6285, 5.7735, 5.9172, 6.0596, 5.9732, 6.1143,\n 6.2541, 6.1685, 6.0838, 6.0000, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.0287, 6.1644, 6.2991, 6.4327, 6.5653, 6.6968, 6.8274, 6.7462,\n 6.6658, 6.5861, 6.5072, 6.4291, 6.3517, 6.4807, 6.4039, 6.5320,\n 6.6591, 6.5828, 6.5072, 6.4322, 6.3580, 6.2843, 6.2113, 6.1389,\n 6.2644, 6.1926, 6.3172, 6.2458, 6.3694, 6.4923, 6.6144, 6.7358,\n 6.8563, 6.7850, 6.9048, 7.0238, 6.9529, 6.8825, 6.8127, 6.7434,\n 6.8614, 6.7925, 6.9097, 6.8413, 6.9577, 7.0735, 7.1885, 7.3030,\n 7.4168, 7.5299, 7.4616, 7.3937, 7.3263, 7.2594, 7.1929, 7.1270,\n 7.2391, 7.1735, 7.2849, 7.3958, 7.3305, 7.2656, 7.2012, 7.1372,\n 7.0736, 7.0104, 6.9477, 7.0574, 6.9950, 7.1041, 7.0420, 7.1506,\n 7.2585, 7.3660, 7.4729, 7.5794, 7.5173, 7.6231, 7.7285, 7.6667,\n 7.6052, 7.5441, 7.4834, 7.5880, 7.5276, 7.6317, 7.7353])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The international humanitarian aid organization, Doctors Without Borders/Medecins Sans Frontieres (MSF), continues to treat victims of violence in all locations where it is present in Darfur.\nHypothesis: Doctors Without Borders is an international aid organization.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.8131, 0.7332, 0.9456, 1.1547,\n 1.0742, 1.2792, 1.4812, 1.6803, 1.5986, 1.7942, 1.9870, 1.9052,\n 2.0948, 2.2819, 2.2000, 2.3842, 2.5660, 2.7456, 2.6632, 2.8402,\n 2.7585, 2.9329, 3.1052, 3.0237, 3.1937, 3.3619, 3.5282, 3.4466,\n 3.6107, 3.5298, 3.6919, 3.8523, 3.7717, 3.9302, 4.0872, 4.2426,\n 4.1621, 4.3158, 4.2359, 4.3879, 4.5384, 4.4590, 4.6079, 4.7556,\n 4.9019, 4.8226, 4.9675, 5.1111, 5.0323, 5.1745, 5.3156, 5.2372,\n 5.3769, 5.5155, 5.6530, 5.5750, 5.7112, 5.6338, 5.7689, 5.9029,\n 5.8260, 5.9589, 6.0908, 6.2217, 6.1451, 6.2750, 6.1990, 6.3278,\n 6.4558, 6.3803, 6.5072, 6.6332, 6.7585, 6.6833, 6.8076, 6.7330,\n 6.8564, 6.9789, 6.9048, 7.0265, 7.1474, 7.2675, 7.1938, 7.3131,\n 7.2399, 7.3584, 7.4762, 7.4034, 7.5204, 7.6368, 7.7524, 7.6800,\n 7.7949, 7.7230, 7.8372, 7.9507, 7.8793, 7.9921, 8.1043, 8.2158,\n 8.1448, 8.2557, 8.1851, 8.2954, 8.4050, 8.3349, 8.4439, 8.5524,\n 8.6603, 8.5905, 8.6978, 8.6284, 8.7351, 8.8413, 8.7724, 8.8780,\n 8.9830, 9.0876, 9.0190, 9.1230, 9.0549, 9.1584, 9.2613, 9.1936,\n 9.2960, 9.3980, 9.4995, 9.4321, 9.5331, 9.4661, 9.5666, 9.6667,\n 9.6000, 9.6996, 9.7987, 9.8974, 9.8311, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.6681, 7.8113, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 10.9769, 10.8801, 10.7843, 10.8960, 10.8012, 10.7074, 10.8186,\n 10.7257, 10.8363, 10.7444, 10.8544, 10.7635, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.1184, 12.2178, 12.3167, 12.4150,\n 12.3309, 12.4289, 12.3455, 12.2627, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.5685, 12.6643, 12.7597, 12.6785, 12.5979, 12.6930, 12.7876, 12.8817,\n 12.8019, 12.8957, 12.8165, 12.9099, 12.8313, 12.9244, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.4234, 13.3473, 13.4371, 13.3615, 13.2864, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: In a bowl, whisk together the eggs and sugar until completely blended and frothy.\nHypothesis: In a bowl, whisk together the egg, sugar and vanilla until light in color.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "2", + "Fraction of T in Greenlist": "12.5%", + "z-score": "-1.15", + "p value": "0.876", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "11", + "Fraction of T in Greenlist": "44.0%", + "z-score": "2.19", + "p value": "0.0141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.1939])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: In Nigeria, by far the most populous country in sub-Saharan Africa, over 2.7 million people are infected with HIV.\nHypothesis: 2.7 percent of the people infected with HIV live in Africa.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.8617, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.8729,\n 1.0319, 0.9733, 0.9152, 0.8577, 1.0141, 0.9567, 0.8997, 1.0541,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.4721, 0.5991, 0.5548, 0.6810,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.4525, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.3434, 6.1584, 5.9797, 5.8068, 5.6395, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.3472, 6.2164, 6.3890, 6.5591, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.0014, 7.1590, 7.0387, 6.9204, 6.8041,\n 6.9601, 6.8458, 7.0000, 6.8876, 7.0401, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.2684, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.7047, 7.6033, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.6976, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.9547, 9.8632, 9.9783, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.5427, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.1253, 11.2316, 11.1435, 11.2493, 11.1621,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.4935, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.0185, 12.1184, 12.0341, 12.1335, 12.0499,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.4722,\n 12.5685, 12.6643, 12.7597, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.0821, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.0214, 14.1091, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A jury is slated to decide for the first time whether Jack Kevorkian, famed as \"Dr. Death,\" has violated Michigan's assisted-suicide ban, while the state continues to grapple with the issue of what to allow when the ill want to end their pain by ending their lives.\nHypothesis: Jack Kevorkian is the real name of \"Dr. Death\".\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "125", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "20.8%", + "z-score": "-1.08", + "p value": "0.861", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.0290, -1.0809, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.0844])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.5717, 1.8257,\n 2.0738, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.6679, 2.5621, 2.4585, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.2743, 2.1798, 2.0870, 1.9959, 2.1997, 2.1094, 2.0207,\n 2.2200, 2.1320, 2.0455, 1.9604, 2.1546, 2.3462, 2.2611, 2.1773,\n 2.3651, 2.2819, 2.4667, 2.6491, 2.8292, 3.0071, 3.1829, 3.3566,\n 3.2717, 3.1879, 3.3587, 3.2757, 3.1937, 3.1129, 3.0330, 2.9542,\n 2.8764, 3.0429, 2.9656, 2.8893, 2.8138, 2.7393, 2.6656, 2.5927,\n 2.7552, 2.6828, 2.6112, 2.7713, 2.7001, 2.6296, 2.5600, 2.7175,\n 2.8735, 2.8039, 2.7349, 2.8889, 2.8203, 2.9726, 3.1236, 3.2733,\n 3.4217, 3.5689, 3.7148, 3.6452, 3.5762, 3.7205, 3.6519, 3.5839,\n 3.5166, 3.4499, 3.3838, 3.3182, 3.4599, 3.3947, 3.3301, 3.2660,\n 3.2025, 3.1395, 3.0770, 3.2161, 3.1539, 3.0923, 3.2299, 3.1685,\n 3.1076, 3.0471, 3.1831, 3.3181, 3.2577, 3.1977, 3.3314, 3.2717,\n 3.4042, 3.5359, 3.6667, 3.7966, 3.9258, 4.0541, 3.9936, 3.9337,\n 4.0608, 4.0011, 3.9418, 3.8829, 3.8244, 3.7664, 3.7087, 3.8341,\n 3.7766, 3.7196, 3.6629, 3.6067, 3.5508, 3.4953, 3.6188, 3.5635,\n 3.5085, 3.6310, 3.5762, 3.5218, 3.4677, 3.5890, 3.7097, 3.6556,\n 3.6019, 3.7216, 3.6680, 3.7869, 3.9052, 4.0228, 4.1399, 4.2563,\n 4.3721, 4.3180, 4.2642, 4.3792, 4.3256, 4.4399, 4.5535, 4.5000,\n 4.6130, 4.5596, 4.6720, 4.7838, 4.8950, 4.8416, 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Take consumer products giant Procter and Gamble. Even with a $1.8 billion Research and Development budget, it still manages 500 active partnerships each year, many of them with small companies.\nHypothesis: Procter and Gamble spends $1.8 billion for Research and Development.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.0586, 0.1166, 0.2901, 0.4619, 0.4021, 0.3430, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.3303, 0.4932, 0.4364,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.2349, 0.3746,\n 0.5134, 0.6513, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.6199, 0.5740, 0.5283, 0.4828, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, 0.0838, 0.2089, 0.3333,\n 0.4571, 0.5803, 0.5375, 0.4949, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.8890, 5.6614, 5.8889, 5.6737, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.4051, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.1857, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.6339, 7.7784, 7.9216, 8.0632, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.0139, 8.9178, 8.8227, 8.9469,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.3951, 9.3042, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.9625, 9.8753, 9.9878, 9.9015,\n 9.8159, 9.7312, 9.8430, 9.9542, 10.0647, 9.9807, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.3583, 10.4652, 10.3835, 10.4898,\n 10.5955, 10.7006, 10.8051, 10.9091, 11.0125, 10.9317, 11.0346, 11.1370,\n 11.0569, 11.1588, 11.0793, 11.0004, 10.9220, 11.0235, 11.1245, 11.2250,\n 11.1473, 11.2473, 11.3468, 11.2698, 11.3688, 11.4674, 11.3910, 11.4891,\n 11.5868, 11.5109, 11.6082, 11.7050, 11.8014, 11.8973, 11.9928, 12.0878,\n 12.0127, 12.1073, 12.2016, 12.1270, 12.2209, 12.1468, 12.0731, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.3912, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Deceased U.S. soldiers and their effects were evacuated to Japan and then shipped home in refrigerated containers for interment in the U.S.\nHypothesis: The U.S. military evacuated U.S. citizens.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.3573, -0.4146, -0.4714,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.7201, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.3714, -1.4093, -1.4471, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.9073, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.1471, 1.0613, 1.2778, 1.4907, 1.7002, 1.6131, 1.5275, 1.7321,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.5542, 1.4765, 1.4000, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.3697, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.5131, 1.4446, 1.6164, 1.5483, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.8475, 2.0107, 2.1723, 2.1049, 2.0381,\n 1.9720, 1.9066, 1.8419, 1.7778, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.7154, 1.8682, 1.8071, 1.7465, 1.8974,\n 1.8370, 1.7772, 1.9261, 1.8665, 1.8074, 1.7488, 1.8956, 2.0412,\n 2.1858, 2.3293, 2.4717, 2.4121, 2.5532, 2.6933, 2.8324, 2.7724,\n 2.9103, 3.0471, 2.9872, 2.9277, 3.0632, 3.0039, 2.9451, 3.0792,\n 3.2124, 3.1536, 3.2857, 3.4170, 3.5474, 3.4884, 3.4298, 3.3717,\n 3.3140, 3.2567, 3.3853, 3.5131, 3.6401, 3.5827, 3.5256, 3.4689,\n 3.4126, 3.3567, 3.3012, 3.2460, 3.1912, 3.1368, 3.0827, 3.2071,\n 3.1532, 3.0997, 3.2230, 3.3457, 3.2921, 3.2389, 3.3606, 3.3075,\n 3.2547, 3.3754, 3.3228, 3.4427, 3.5619, 3.5093, 3.4570, 3.4050,\n 3.5232, 3.6407, 3.7576, 3.7055, 3.6537, 3.6021, 3.5509, 3.5000,\n 3.4494, 3.3990, 3.3489, 3.2991, 3.2496, 3.3645, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Mice given a substance found in red wine lived longer despite a fatty diet, a study shows.\nHypothesis: Mice fed with red wine lived longer despite a fatty diet.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.3482, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.8889, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.2611, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 1.0139, 1.1547,\n 1.1028, 1.2423, 1.3810, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.4470, 1.5818, 1.7158, 1.6641, 1.6127, 1.5617, 1.6941, 1.8257,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.5055, 1.4570, 1.5848, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.7213, 1.6737, 1.6262, 1.7498, 1.7025,\n 1.8252, 1.9473, 1.8999, 1.8527, 1.9738, 1.9267, 2.0470, 2.1667,\n 2.2857, 2.2384, 2.1913, 2.1444, 2.0979, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.9373, 4.8003, 4.6667,\n 4.8662, 4.7357, 4.9316, 4.8038, 4.9962, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.3067, 5.1864, 5.0684, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.5432, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.7242, 5.8835, 5.7812, 5.9386,\n 5.8377, 5.9932, 6.1471, 6.0474, 5.9491, 5.8522, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.1128, 6.0193, 6.1664, 6.0740, 6.2197, 6.1283,\n 6.2725, 6.1820, 6.0927, 6.2354, 6.1470, 6.2883, 6.2008, 6.3408,\n 6.2541, 6.3928, 6.3070, 6.4444, 6.5807, 6.4957, 6.4116, 6.3283,\n 6.2459, 6.3807, 6.5144, 6.4327, 6.3517, 6.4842, 6.4040, 6.5354,\n 6.4558, 6.5861, 6.5072, 6.6365, 6.5583, 6.4807, 6.6089, 6.5320,\n 6.6591, 6.5828, 6.7090, 6.6332, 6.7585, 6.6833, 6.8076, 6.9310,\n 6.8564, 6.7823, 6.7089, 6.6361, 6.7584, 6.8799, 7.0007, 6.9282,\n 6.8563, 6.9762, 6.9048, 7.0238, 7.1421, 7.0711, 7.0006, 7.1181,\n 7.0481, 7.1647, 7.0952, 7.2111, 7.1420, 7.2572, 7.1885, 7.3030,\n 7.4168, 7.3485, 7.2807, 7.2134, 7.1465, 7.2594, 7.3717, 7.3051,\n 7.2391, 7.3506, 7.2849, 7.3958, 7.3305, 7.4407, 7.3758, 7.4853,\n 7.4208, 7.3566, 7.4655, 7.4017, 7.5100, 7.4465, 7.5542, 7.4911,\n 7.5981, 7.5353, 7.6418, 7.7478, 7.6853, 7.6231, 7.5614, 7.5000,\n 7.6052, 7.7099, 7.8142, 7.7530, 7.8567, 7.9599, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Charles de Gaulle died in 1970 at the age of eighty. He was thus fifty years old when, as an unknown officer recently promoted to the (temporary) rank of brigadier general, he made his famous broadcast from London rejecting the capitulation of France to the Nazis after the debacle of May-June 1940.\nHypothesis: Charles de Gaulle died in 1970.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.0695, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.1980,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.3205, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.2689, -0.3127, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.5053, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.5627, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.4101, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.0817, 2.9814, 2.8830, 3.0796, 3.2733, 3.1754,\n 3.0793, 3.2691, 3.4562, 3.3607, 3.2667, 3.1743, 3.3574, 3.5382,\n 3.7166, 3.6242, 3.8000, 3.7087, 3.8819, 4.0531, 3.9624, 3.8730,\n 4.0415, 4.2080, 4.1192, 4.0316, 4.1957, 4.1090, 4.2710, 4.4313,\n 4.3451, 4.2601, 4.4182, 4.3339, 4.2507, 4.1684, 4.0872, 4.0069,\n 3.9276, 3.8492, 4.0038, 4.1569, 4.0788, 4.0016, 4.1528, 4.3027,\n 4.2258, 4.1497, 4.0745, 4.2222, 4.3687, 4.5140, 4.6580, 4.5826,\n 4.7252, 4.8666, 4.7916, 4.7173, 4.8572, 4.9960, 4.9221, 4.8488,\n 4.9862, 4.9135, 5.0496, 5.1848, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.0325, 4.9624, 4.8930, 4.8242, 4.7559, 4.6883, 4.8200, 4.9507,\n 4.8833, 4.8164, 4.9460, 5.0747, 5.0080, 4.9419, 4.8763, 5.0037,\n 5.1303, 5.2560, 5.3810, 5.3153, 5.4393, 5.5626, 5.4971, 5.4322,\n 5.5544, 5.6760, 5.6112, 5.5470, 5.6675, 5.6036, 5.7234, 5.8424,\n 5.7787, 5.7155, 5.8336, 5.7707, 5.7082, 5.6462, 5.5846, 5.5234,\n 5.4626, 5.4023, 5.5189, 5.6349, 5.5747, 5.5149, 5.6300, 5.7446,\n 5.6849, 5.6256, 5.5668, 5.6804, 5.7934, 5.9059, 5.8470, 5.9588,\n 5.9002, 6.0113, 6.1219, 6.0635, 6.0054, 6.1153, 6.2246, 6.1667,\n 6.1091, 6.2177, 6.1604, 6.2684, 6.3758, 6.3187, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Teenage sensation Wayne Rooney powered England into the quarter-finals of Euro 2004 with two goals in Monday's 4-2 defeat of Croatia and they were joined in the last eight by champions France who beat Switzerland 3-1.\nHypothesis: France participates in Euro 2004.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 2.0370, 2.3190, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 1.9795, 1.8728, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 1.9215, 2.1412, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.9711, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.6997, 1.6239, 1.8074,\n 1.7321, 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.6499,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.9238, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.6662, 0.6199, 0.7506, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.8245, 0.7789, 0.7336, 0.8607, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.8065, 0.9313, 0.8866, 0.8422, 0.9659, 0.9215, 0.8773, 1.0000,\n 1.1221, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 4.1851, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.4444, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.7132,\n 6.8641, 7.0133, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.7047, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.0483, 8.1816, 8.0829, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.6052, 10.7141, 10.8224, 10.9301, 11.0371, 11.1435, 11.0562, 10.9697,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 12.0499,\n 12.1489, 12.0660, 11.9837, 12.0824, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.6785, 12.5979, 12.6930, 12.6130, 12.7077,\n 12.8019, 12.7226, 12.8165, 12.9099, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.4744, 13.3967, 13.3196, 13.4100, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.7801, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Fujimori charged that on January 26, 1995, Ecuador fired the first shot, an allegation denied by Ecuador's leader, Sixto Duran-Ballen. Predictably, each side blamed the other for starting the 1995 conflict, just as each pointed the finger of guilt to the other for provoking the border war of 1941, when Peru took most of the 120,000 square miles in contention between the two countries.\nHypothesis: President Fujimori was re-elected in 1995.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "43.8%", + "z-score": "3.72", + "p value": "0.000101", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.0792, 2.9424, 2.8098, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.3566, 3.2348, 3.4528, 3.3333,\n 3.2167, 3.1027, 3.3147, 3.2026, 3.4101, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.0817, 3.2796, 3.4743, 3.6662, 3.5642, 3.7528,\n 3.9386, 3.8376, 3.7383, 3.9208, 3.8228, 3.7264, 3.6315, 3.8103,\n 3.7166])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "158", + "Fraction of T in Greenlist": "79.4%", + "z-score": "17.7", + "p value": "1.43e-70", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.4306, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.1143, 5.8889, 6.1101, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.6615, 7.4838, 7.6594, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.0017, 8.1654, 8.3267, 8.4857, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.0924, 9.2388, 9.3834, 9.5263,\n 9.3811, 9.5230, 9.6632, 9.8020, 9.9392, 10.0750, 10.2093, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.3314, 10.4614, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 10.9634, 11.0870, 11.2094, 11.3308, 11.2036, 11.3244,\n 11.1994, 11.3196, 11.4388, 11.5570, 11.6743, 11.5525, 11.6693, 11.7851,\n 11.9001, 11.7809, 11.8953, 12.0089, 12.1216, 12.2336, 12.3447, 12.4550,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.5531, 12.6611, 12.7683, 12.8749,\n 12.9807, 13.0859, 13.1904, 13.0798, 13.1839, 13.2873, 13.3902, 13.2816,\n 13.3840, 13.4859, 13.5871, 13.6878, 13.7878, 13.8873, 13.7813, 13.8804,\n 13.9790, 14.0771, 13.9728, 14.0705, 14.1677, 14.2644, 14.3605, 14.4562,\n 14.5513, 14.4493, 14.5442, 14.6385, 14.7324, 14.6319, 14.7255, 14.8187,\n 14.9113, 15.0035, 15.0952, 15.1865, 15.0882, 15.1792, 15.2698, 15.3600,\n 15.2631, 15.3530, 15.4425, 15.5316, 15.6203, 15.7086, 15.7965, 15.7014,\n 15.7890, 15.8763, 15.9632, 15.8694, 15.9561, 16.0424, 16.1283, 16.2139,\n 16.2990, 16.3839, 16.2917, 16.3764, 16.4607, 16.5446, 16.4536, 16.5374,\n 16.6208, 16.7039, 16.7866, 16.8690, 16.9511, 16.8616, 16.9435, 17.0251,\n 17.1064, 17.0180, 17.0991, 17.1799, 17.2604, 17.3406, 17.4204, 17.5000,\n 17.4130, 17.4925, 17.5716, 17.6504, 17.5644, 17.6431, 17.7215])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Hepburn's platinum, diamond and sapphire brooch had been estimated to fetch just $20,000, but sold for $120,000, six times its estimated price.\nHypothesis: Hepburn's diamond and sapphire brooch fetched $120,000.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330, 1.3472,\n 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547, 0.9802, 0.8165,\n 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714, 0.3464, 0.2265, 0.5556,\n 0.4364, 0.3216, 0.2108, 0.5185, 0.4082, 0.3015, 0.5941, 0.4880, 0.3849,\n 0.6644, 0.9366, 0.8321, 0.7303, 0.6312, 0.5345, 0.4402, 0.6963, 0.6025,\n 0.8513, 0.7579, 0.6667, 0.5774, 0.8165, 0.7276, 0.9608, 1.1896, 1.4142,\n 1.3234, 1.5430, 1.4530, 1.6678, 1.5785, 1.4907, 1.4045, 1.6131, 1.5275,\n 1.4434, 1.6471, 1.5635, 1.7634, 1.6803, 1.8766, 1.7942, 1.9870, 2.1773,\n 2.3651, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656, 1.9887,\n 1.9127, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771, 1.4076, 1.3389,\n 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428, 0.8793, 0.8165, 0.7543,\n 0.6928, 0.6319, 0.8003, 0.7395, 0.9058, 0.8452, 0.7851, 0.7256, 0.6667,\n 0.8295, 0.7707, 0.7124, 0.6547, 0.5974, 0.5407, 0.6999, 0.8577, 0.8006,\n 0.7441, 0.6880, 0.6325, 0.7873, 0.7318, 0.6768, 0.8296, 0.7746, 0.9258,\n 0.8709, 0.8165, 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 1.0973, 1.0435,\n 1.1882, 1.1345, 1.2778, 1.4201, 1.5614, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.1905, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366, 0.8868,\n 0.8374, 0.7884, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129, 1.0465, 0.9979,\n 0.9497, 1.0820, 1.0338, 1.1651, 1.1169, 1.2472, 1.1991, 1.3284, 1.4570,\n 1.5848, 1.5363, 1.4881, 1.4402, 1.3926, 1.3453, 1.2982, 1.2514, 1.2049,\n 1.1587, 1.1127, 1.0670, 1.0215, 0.9763, 0.9313, 0.8866, 1.0106, 0.9659,\n 1.0890, 1.0444, 1.0000, 1.1221, 1.0777, 1.0336, 1.1547, 1.1106, 1.2309,\n 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.4444, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.0387, 7.1945, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.4524, 7.6000, 7.4878,\n 7.3773, 7.5234, 7.4146, 7.5593, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.8428, 7.7414, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.4449, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.6667, 8.7927, 8.9178, 9.0419, 8.9469,\n 8.8529, 8.9763, 8.8833, 9.0057, 9.1273, 9.2480, 9.1561, 9.2760,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.2435, 9.1553, 9.2729, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.4474, 9.3617, 9.4763, 9.3915, 9.5054,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 10.0342, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.4341, 10.3544, 10.4596, 10.3805, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.7175, 10.6397, 10.7423, 10.6650, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.6404, 10.5654, 10.6665, 10.7671, 10.8673, 10.7928,\n 10.8925, 10.8186, 10.9178, 10.8444, 10.7714, 10.8702, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.3809, 11.3091, 11.4047, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.7120, 11.8056, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Huckaby voluntarily submitted herself to questioning Friday night at the Tracy police station, and was arrested less than six hours later. She now resides in the San Joaquin County Jail without bond, awaiting an arraignment hearing on Tuesday. On April 6, the body of Sandra Cantu was discovered stuffed inside the 28-year-old's suitcase at the bottom of a pond a few miles away from her home. The two were neighbors in the Orchard Estates Mobile Home Park and Huckaby's own 5-year-old daughter often played with Cantu. Autopsy results are still pending.\nHypothesis: Huckaby is accused of killing Sandra Cantu.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.7493,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.1784, -1.0445,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.0390, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "143", + "Fraction of T in Greenlist": "71.9%", + "z-score": "15.3", + "p value": "6.45e-53", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 5.6395, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 6.7254, 6.8995, 7.0711, 6.9286, 7.0980,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 8.0829,\n 8.2353, 8.1016, 8.2525, 8.1216, 8.2711, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.2952,\n 9.1735, 9.3086, 9.1890, 9.3231, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.4858, 9.6156, 9.7442, 9.8716, 9.9980, 10.1234, 10.2476, 10.1352,\n 10.2587, 10.1479, 10.2706, 10.1614, 10.2833, 10.1756, 10.2967, 10.4169,\n 10.5363, 10.6547, 10.7722, 10.8889, 11.0047, 11.1197, 11.0147, 11.1291,\n 11.0254, 11.1392, 11.0368, 11.1500, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.7130, 11.8212, 11.9288, 11.8299, 11.9370, 11.8392,\n 11.9457, 11.8491, 11.9551, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.5930, 12.6939, 12.6012, 12.7017,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.3128, 13.4086, 13.3201, 13.4155, 13.3279,\n 13.4230, 13.3361, 13.4308, 13.5250, 13.6188, 13.7122, 13.8051, 13.8976,\n 13.9896, 14.0813, 13.9959, 14.0872, 14.0025, 14.0936, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.4651, 14.5535, 14.6416,\n 14.7293, 14.6473, 14.7348, 14.6534, 14.7406, 14.6599, 14.7468, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.2659])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: CAMDEN, N.J. (Reuters) \u2014 Three Muslim brothers from Albania were sentenced to life in prison on Tuesday for a plot to kill American soldiers at the Fort Dix military base, which prosecutors said was inspired by the idea of holy war against the United States. The men, Dritan Duka, 30, Shain Duka, 28, and Eljvir Duka, 25, all illegal immigrants, were each sentenced to life without the possibility of parole. The three, who operated a roofing business in Cherry Hill, N.J., were among five foreign-born Muslims convicted in December of planning an attack at the base, about 40 miles east of Philadelphia. The attack was never carried out.\nHypothesis: Eljvir Duka comes from Albania.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.3937, -1.4757, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.8577, 0.8006, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.8709, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.2243, 1.1711, 1.1183, 1.0659, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.8755, 1.0105, 0.9615, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 1.0338, 0.9858, 1.1169, 1.2472,\n 1.1991, 1.1513, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 1.0445,\n 1.1717, 1.1251, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.2244, 1.3474, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "5", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "60.0%", + "z-score": "1.81", + "p value": "0.0354", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A number of the items that he auctioned off over the two day period have been seen on his reality TV show The Osbournes, which featured home life with Sharon, Ozzy and their two children. Amongst some of the higher-priced items were a carved walnut Victorian-style custom built pool table which raised $11,250, a painting from Edourad Drouot which fetched $10,500, a pair of Ozzy's famous round glasses which raised $5,250 and a dog bed given to Sharon by Elton John which sold for $2,375.\nHypothesis: \"The Osbournes\" is the name of a reality show starring Ozzy Osbourne.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.4746, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.2144, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.0516, 0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.3849,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.7493,\n 0.7001, 0.8374, 0.9739, 0.9245, 0.8755, 1.0105, 1.1447, 1.2780,\n 1.2285, 1.3608, 1.4923, 1.4427, 1.3933, 1.3443, 1.2956, 1.2472,\n 1.3768, 1.3284, 1.4570, 1.4087, 1.3607, 1.3131, 1.4402, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.5621, 1.5159, 1.6378, 1.5916, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 2.3113, 2.1909,\n 2.0738, 2.3163, 2.5533, 2.7852, 2.6681, 2.8943, 3.1160, 3.0000,\n 2.8868, 3.1027, 2.9913, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 3.9595, 4.1461, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.4809, 4.3788, 4.5544, 4.7278, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.9346, 5.0990, 5.2615, 5.1640,\n 5.0679, 5.2281, 5.3867, 5.5435, 5.6986, 5.8522, 5.7566, 5.6622,\n 5.8139, 5.7207, 5.6286, 5.7785, 5.9270, 5.8358, 5.7458, 5.8926,\n 6.0380, 6.1820, 6.0927, 6.0044, 5.9172, 5.8310, 5.9732, 6.1143,\n 6.0288, 5.9442, 6.0838, 6.2222, 6.3595, 6.4957, 6.6308, 6.5465,\n 6.6804, 6.5970, 6.5144, 6.6471, 6.7788, 6.9094, 6.8274, 6.9570,\n 6.8757, 7.0043, 7.1319, 7.0513, 6.9714, 6.8922, 6.8138, 6.9402,\n 7.0658, 6.9879, 6.9107, 7.0353, 7.1590, 7.2818, 7.4039, 7.5251,\n 7.4483, 7.5687, 7.4924, 7.4168, 7.5364, 7.6551, 7.7732, 7.6980,\n 7.8153, 7.7407, 7.8571, 7.9729, 7.8988, 7.8253, 7.7524, 7.6800,\n 7.7949, 7.9091, 7.8372, 7.7658, 7.8793, 7.9921, 8.1043, 8.2158,\n 8.3268, 8.2557, 8.3660, 8.2954, 8.2252, 8.3349, 8.4439, 8.5524,\n 8.4826, 8.5905, 8.5212, 8.6284, 8.7351, 8.6662, 8.5978, 8.5298,\n 8.4623, 8.5683, 8.6738, 8.6066, 8.5399, 8.6448, 8.7492, 8.8531,\n 8.9565, 9.0593, 8.9929, 9.0952, 9.0292, 8.9635, 9.0653, 9.1667,\n 9.2676, 9.2022, 9.3026, 9.2376, 9.3375, 9.4370, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: According to reports, a man protesting the G20 Summit in London, England has died after collapsing at a protester camp. Sky News says the man collapsed on the street inside a camp close to the Bank of England and when found he was still breathing, but efforts by paramedics to rescue him failed and he was pronounced dead at an area hospital. The name of the person and cause of death are not yet known, but several people were injured earlier in the day. It is also reported by Sky News that people threw bottles at him and authorities when they were taking him to a waiting ambulance.\nHypothesis: Sky News offices are close to the Bank of England.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, 0.0529, 0.0000,\n 0.1575, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.3073, 0.4377, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.3800, 0.3369, 0.2940, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.6307, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.4866, 6.3723, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.5258, 7.6603, 7.7937, 7.9259,\n 8.0571, 7.9630, 7.8699, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.2729, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.1615, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.4513, 9.5638, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 10.0342, 9.9524, 10.0611, 10.1692, 10.2766, 10.1955, 10.1151,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.2790, 10.3827, 10.4858, 10.5884, 10.6904,\n 10.6144, 10.7159, 10.8170, 10.7415, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.1886, 11.2864, 11.3837, 11.4806, 11.4068,\n 11.3335, 11.2607, 11.3572, 11.4533, 11.3809, 11.3091, 11.2376, 11.3333,\n 11.4286, 11.5235, 11.6179, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The San Diego Padres ace, Jake Peavy, was hurt in an 8-5 loss to the St. Louis Cardinals.\nHypothesis: The San Diego Padres won the game against the St. Louis Cardinals.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "22.0%", + "z-score": "-0.719", + "p value": "0.764", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.0954,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "47.7%", + "z-score": "5.47", + "p value": "2.19e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.5590, 3.7905, 4.0166,\n 3.8772, 4.0980, 3.9620, 4.1779, 4.3894, 4.2563, 4.1265, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.0446, 3.9284, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.9528, 4.8394, 4.7281, 4.6188,\n 4.7980, 4.6904, 4.5847, 4.4809, 4.6568, 4.5544, 4.4537, 4.6268,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.9346, 5.0990, 5.0017, 5.1640,\n 5.0679, 5.2281, 5.3867, 5.2915, 5.1977, 5.1051, 5.0138, 5.1698,\n 5.3243, 5.2338, 5.1444, 5.0562, 5.2086, 5.3594, 5.5088, 5.6569,\n 5.8035, 5.7155, 5.8606, 5.7735, 5.6874, 5.6023, 5.7457, 5.6614,\n 5.5780, 5.4956, 5.6373, 5.5556, 5.4747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Despite CNOOC's all-cash bid, Unocal said its recommendation to shareholders in favor of the $16.4 billion offer of cash and stock from Chevron remains in effect.\nHypothesis: Unocal said it would evaluate the CNOOC offer.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.9428,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.3213, 1.2577, 1.4222, 1.3587,\n 1.2959, 1.4580, 1.3954, 1.5556, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.4412, 1.5945, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.3943, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 1.0812, 1.0284, 1.1711, 1.1183, 1.0659, 1.0139, 0.9623,\n 0.9110, 1.0512, 1.1905, 1.1390, 1.2771, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.5818, 1.5303, 1.6641, 1.6127, 1.5617, 1.6941, 1.6432,\n 1.7746, 1.9052, 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.6036,\n 1.5544, 1.6827, 1.8102, 1.7609, 1.7119, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.2566, 1.3786, 1.3333,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.4272, 4.2515, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 3.7017, 3.5590, 3.4207, 3.2863,\n 3.5165, 3.7417, 3.6098, 3.4816, 3.3566, 3.5753, 3.7897, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 5.0034, 4.8857, 5.0684, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.8275, 5.7133, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.2000, 6.0928, 5.9874, 5.8835, 5.7812, 5.6804,\n 5.5811, 5.7382, 5.6401, 5.5435, 5.4482, 5.6032, 5.7566, 5.9084,\n 6.0587, 6.2075, 6.1128, 6.0193, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.4153, 6.5569, 6.6973, 6.8364, 6.7456, 6.8834, 7.0201,\n 7.1556, 7.2900, 7.4233, 7.3333, 7.4655, 7.3765, 7.5076, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.7192, 7.8463, 7.7598, 7.6742, 7.5895,\n 7.5056, 7.4225, 7.3402, 7.2587, 7.3845, 7.3037, 7.2236, 7.1443,\n 7.2691, 7.3930, 7.5161, 7.4373, 7.3592, 7.2818, 7.2051, 7.1291,\n 7.2510, 7.3721, 7.4924, 7.6120, 7.5364, 7.6551, 7.7732, 7.8905,\n 7.8153, 7.9318, 8.0476, 8.1628, 8.2772, 8.3910, 8.3162, 8.4293,\n 8.3550, 8.4674, 8.3937, 8.5054, 8.6165, 8.7270, 8.6537, 8.7636,\n 8.6908, 8.6186, 8.5469, 8.4757, 8.4050, 8.3349, 8.2652, 8.3742,\n 8.3050, 8.2362, 8.1679, 8.2762, 8.3840, 8.3161, 8.4232, 8.5298,\n 8.6359, 8.5683, 8.6738, 8.7788, 8.8832, 8.9872, 8.9199, 9.0233,\n 8.9565, 9.0593, 9.1617, 9.0952, 9.1971, 9.2986, 9.3995, 9.3333,\n 9.4338, 9.3680, 9.3026, 9.4026, 9.3375, 9.2729, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: U.S. forces have been engaged in intense fighting after insurgents launched simultaneous attacks in several Iraqi cities, including Fallujah and Baqubah.\nHypothesis: Fallujah and Baqubah are Iraqi cities.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "30.8%", + "z-score": "1.07", + "p value": "0.141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "93", + "Fraction of T in Greenlist": "46.7%", + "z-score": "7.08", + "p value": "7.19e-13", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.0370, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.5627, 2.8098, 2.6811, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.4371, 2.6681, 2.8943, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.3147, 3.2026, 3.0929, 2.9856, 2.8804, 2.7775,\n 2.9824, 3.1840, 3.0817, 3.2796, 3.1787, 3.0796, 2.9823, 2.8868,\n 3.0793, 3.2691, 3.4562, 3.6407, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.3558, 3.5333, 3.7087, 3.8819, 4.0531, 3.9624, 4.1312,\n 4.0415, 3.9530, 3.8657, 3.7796, 3.6947, 3.6109, 3.7758, 3.9389,\n 3.8555, 4.0166, 3.9340, 3.8523, 3.7717, 3.6920, 3.8503, 4.0069,\n 4.1621, 4.3158, 4.2359, 4.3879, 4.3086, 4.2303, 4.1528, 4.0762,\n 4.2258, 4.3740, 4.5210, 4.6667, 4.5899, 4.7341, 4.6580, 4.5826,\n 4.5079, 4.4341, 4.5762, 4.7173, 4.8572, 4.9960, 4.9221, 5.0596,\n 4.9862, 4.9135, 4.8414, 4.7700, 4.9058, 5.0406, 5.1744, 5.3072,\n 5.2358, 5.3675, 5.2965, 5.2262, 5.1564, 5.0873, 5.2175, 5.3468,\n 5.4752, 5.6028, 5.5336, 5.6602, 5.5915, 5.5233, 5.4557, 5.3886,\n 5.5138, 5.6383, 5.7619, 5.8848, 5.8177, 5.9397, 5.8730, 5.8068,\n 5.7411, 5.6760, 5.7967, 5.9168, 6.0362, 6.1548, 6.0897, 6.2075,\n 6.1427, 6.0784, 6.0145, 5.9510, 6.0678, 6.1839, 6.2994, 6.4143,\n 6.3509, 6.4650, 6.4019, 6.3392, 6.2770, 6.2152, 6.3283, 6.4409,\n 6.5528, 6.6642, 6.6024, 6.7132, 6.6517, 6.5906, 6.5299, 6.4695,\n 6.5794, 6.6887, 6.7974, 6.9056, 6.8454, 6.9530, 6.8930, 6.8333,\n 6.7740, 6.7151, 6.8219, 6.9282, 7.0340, 7.1393, 7.0804])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: But Huawei says that expansion has not been easy - obtaining visas for its Chinese engineers to work on long-term projects in India being a particular challenge.\nHypothesis: Chinese engineers working on long-term projects in India can easily obtain visas.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.4792, -1.5206, -1.5617, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.2879, -1.3284, -1.3687, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.5706, -1.6087, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.6496, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.5333, 7.4174, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.8353, 10.7362, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.3091, 11.2127, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.7647, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.1125, 12.0218, 12.1244,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.6287, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 12.9527, 12.8661, 12.9628,\n 13.0590, 13.1547, 13.2499, 13.3447, 13.4390, 13.3537, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.7434, 13.6604, 13.7521,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.2046, 14.1227, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.5726, 14.4923, 14.4126, 14.5000,\n 14.5871, 14.6738, 14.7601, 14.8462, 14.9318, 14.8530, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: According to Becky Gibbons of New York State Police and Chris Collins County Executive in Erie County, New York, the total number of fatalities is 50, including 45 passengers, four crew members and a person on the ground, while a woman and daughter on the ground were injured, near the edge of farmland, about seven miles from Buffalo Niagara International Airport.\nHypothesis: A daily commuter flight crashed in Buffalo.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.6590, 1.5396, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.9795, 1.8728, 2.1054, 2.0000,\n 1.8970, 1.7963, 2.0211, 1.9215, 2.1412, 2.0428, 1.9462, 2.1602,\n 2.0647, 2.2743, 2.1798, 2.3851, 2.2916, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.0455, 1.9604, 1.8766, 1.7942, 1.9870, 1.9052,\n 1.8245, 2.0135, 1.9333, 1.8543, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.2999, 1.4771,\n 1.4076, 1.3389, 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142,\n 1.3483, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -0.8422, -0.8819, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330, 2.1170,\n 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321, 2.1004, 2.4495,\n 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 2.3570, 2.6558, 2.4910, 2.3333,\n 2.1822, 2.4659, 2.3190, 2.5924, 2.4495, 2.3116, 2.1783, 2.4398, 2.3094,\n 2.1831, 2.0605, 1.9415, 1.8257, 1.7132, 1.9599, 1.8489, 2.0889, 2.3238,\n 2.5538, 2.7791, 3.0000, 3.2167, 3.1027, 3.3147, 3.2026, 3.4101, 3.6141,\n 3.8146, 3.7033, 3.9001, 4.0937, 3.9837, 4.1740, 4.3614, 4.5461, 4.7281,\n 4.6188, 4.7980, 4.6904, 4.8669, 4.7610, 4.6568, 4.5544, 4.7278, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.0332, 5.1978, 5.0990, 5.0017, 4.9058, 4.8113,\n 4.9731, 5.1332, 5.2915, 5.4482, 5.3541, 5.2614, 5.1698, 5.0795, 5.2338,\n 5.3865, 5.2970, 5.2086, 5.1212, 5.2719, 5.1855, 5.3345, 5.2489, 5.1643,\n 5.0807, 4.9980, 5.1450, 5.0630, 5.2085, 5.3526, 5.4956, 5.6373, 5.5556,\n 5.6959, 5.8351, 5.9732, 5.8919, 5.8114, 5.9481, 6.0837, 6.2183, 6.1382,\n 6.0590, 6.1923, 6.3246, 6.4558, 6.5861, 6.7155, 6.6365, 6.5583, 6.6865,\n 6.8138, 6.9402, 7.0658, 7.1904, 7.1125, 7.2363, 7.3592, 7.4813, 7.6026,\n 7.7232, 7.6456, 7.7653, 7.6883, 7.8072, 7.9253, 8.0427, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.2381, 8.3526, 8.4664, 8.5796, 8.5041, 8.6166, 8.5417,\n 8.6535, 8.5792, 8.5054, 8.4322, 8.5433, 8.4706, 8.3984, 8.3268, 8.2557,\n 8.1851, 8.1150, 8.2252, 8.1556, 8.2652, 8.3742, 8.4826, 8.5905, 8.6978,\n 8.8045, 8.7351, 8.8413, 8.7724, 8.8780, 8.9830, 9.0876, 9.0190, 9.1230,\n 9.2265, 9.1584, 9.2613, 9.3638, 9.4658, 9.5673, 9.4995, 9.6005, 9.5331,\n 9.6336, 9.5666, 9.5000, 9.4338, 9.5338, 9.6334, 9.7325, 9.6666, 9.6011,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A smaller proportion of Yugoslavia's Italians were settled in Slovenia (at the 1991 national census, some 3000 inhabitants of Slovenia declared themselves as ethnic Italians).\nHypothesis: Slovenia has 3,000 inhabitants.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.1237, -0.9858, -1.0276, -1.0690,\n -1.1103, -0.9742, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "200", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.0%", + "z-score": "13.4", + "p value": "3.43e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.3231, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.0984, 3.4017, 3.2004, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 4.8003, 4.6667,\n 4.8662, 5.0623, 4.9316, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.6585, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.9333, 6.0928, 6.2505, 6.1450, 6.0410, 5.9386,\n 6.0943, 5.9932, 5.8936, 6.0474, 6.1996, 6.3502, 6.4993, 6.4008,\n 6.3035, 6.4510, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.3485, 7.4853, 7.3901, 7.5258, 7.6603, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.2348, 10.3445, 10.4537, 10.5621, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.9048, 11.0102, 11.1151, 11.2194, 11.1352, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.3812, 11.2992, 11.4009, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.7217, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.3263, 12.2467, 12.3428, 12.2638, 12.3595,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.4880, 12.4109, 12.5049, 12.5986,\n 12.6918, 12.7847, 12.7082, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.1063, 13.1966, 13.2864, 13.2118, 13.3014, 13.3905])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Eric Harris and Dylan Klebold, seniors at the suburban Denver school, detonated homemade bombs and opened fire with shotguns, a rifle and a semiautomatic handgun on April 20, 1999. They killed a teacher and 12 students and wounded 23 others before committing suicide. The massacre shocked the country like no other. It was the worst school assault in American history at that time, and it came in the wake of a half-dozen others. It played out on live television, watched by millions. And it represented the violent destruction of a cherished American idea: that schools in the suburbs and the countryside were havens of peace and safety.\nHypothesis: 13 persons were killed by two students in 1999.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.0906, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.4376, -0.4845, -0.3380, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, 0.0447, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.2158, 0.1721, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.1684, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.2879, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.0632, 8.2035, 8.0934, 7.9849, 8.1240,\n 8.0171, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.5448, 8.6747, 8.8036, 8.7039, 8.6053,\n 8.7333, 8.6359, 8.5396, 8.4444, 8.5715, 8.6976, 8.8227, 8.9469,\n 9.0702, 8.9763, 8.8833, 9.0057, 8.9138, 8.8228, 8.9444, 8.8544,\n 8.9752, 8.8860, 9.0060, 9.1252, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.4185, 9.3320, 9.4474, 9.5620, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.5346, 9.6471, 9.7590, 9.8702, 9.9807, 10.0906, 10.0074,\n 9.9249, 10.0342, 9.9524, 9.8712, 9.9800, 9.8995, 9.8197, 9.9278,\n 10.0353, 10.1423, 10.2486, 10.1695, 10.2753, 10.3805, 10.4852, 10.4067,\n 10.3289, 10.4330, 10.5366, 10.4594, 10.3827, 10.4858, 10.4097, 10.3341,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.7671, 10.6927, 10.6187,\n 10.7189, 10.6455, 10.5725, 10.6722, 10.5998, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 11.0194, 10.9480, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.4765, 11.4065, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: 47-year-old Susan Boyle from Blackburn, West Lothian in Scotland, made her debut appearance on the show on Saturday night by saying that she had \"never been married, never been kissed\" and was currently unemployed, living alone at home with her cat, Pebbles. She says that she wants to \"be a professional singer\", but has \"never been given the chance.\" Audience members and judges Amanda Holden, Simon Cowell and Piers Morgan, first laughed and even poked fun at her. Boyle then stunned the judges and audience, getting a standing ovation, with her performance of I Dreamed a Dream from the award winning musical performance Les Mis\u00e9rables. Cowell called her performance \"extraordinary\" while Morgan called it \"the biggest surprise I've had in three years of this show. I am shocked.\" Holden even admitted that \"everyone was against you [Boyle]\" and that \"we were all being very cynical.\"\nHypothesis: Simon Cowell fell in love with Susan Boyle.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "19.3%", + "z-score": "-1.2", + "p value": "0.886", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -1.2041])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.3894, 4.2563, 4.1265, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.8038, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.1326, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.2129, 5.1065, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.6667, 5.5630, 5.7242, 5.6220, 5.7812, 5.6804,\n 5.8377, 5.7382, 5.6401, 5.5435, 5.6986, 5.8522, 6.0041, 5.9084,\n 6.0587, 6.2075, 6.1128, 6.2601, 6.1664, 6.0740, 5.9827, 5.8926,\n 5.8035, 5.9488, 6.0927, 6.2354, 6.1470, 6.2883, 6.4283, 6.5672,\n 6.7049, 6.8414, 6.9768, 6.8889, 7.0231, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.2904, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.9254, 7.8406, 7.7566, 7.8808, 8.0042, 8.1266, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.1214, 8.2413, 8.3605, 8.4788, 8.5964, 8.5153,\n 8.6321, 8.5516, 8.4718, 8.5879, 8.5088, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.6190, 8.5424, 8.6556, 8.7681, 8.6921, 8.6166,\n 8.5417, 8.4674, 8.3937, 8.3205, 8.2479, 8.3595, 8.4706, 8.3984,\n 8.3268, 8.2557, 8.3660, 8.2954, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.8379, 8.9447, 9.0510, 9.1567, 9.0863, 9.0164, 8.9469, 9.0520,\n 8.9830, 9.0876, 9.0190, 8.9509, 8.8832, 8.9872, 9.0906, 9.1936,\n 9.2960, 9.2287, 9.3306, 9.2637, 9.3651, 9.4661, 9.3995, 9.3333,\n 9.4338, 9.3680, 9.4680, 9.4026, 9.3375, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: On October 1 2001, EU and other countries introduced the option for domestic animal owners to apply for Pet passports under the Pets Travel Scheme (PETS for short), for pets returning from abroad to the United Kingdom. This replaced the old system of 6 months compulsory quarantine for all domestic pets.\nHypothesis: In 2001, the EU introduced a six-month compulsory quarantine for all domestic pets.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.6471, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.6997, 1.6239, 1.8074,\n 1.7321, 1.9127, 1.8378, 2.0158, 2.1918, 2.3658, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 1.9262, 2.0954, 2.2629, 2.1917, 2.1213,\n 2.0517, 1.9829, 2.1470, 2.0785, 2.2405, 2.1723, 2.3324, 2.2646,\n 2.4228, 2.5796, 2.7349, 2.8889, 2.8203, 2.7524, 2.6852, 2.6186,\n 2.7699, 2.7037, 2.8534, 2.7875, 2.7222, 2.6575, 2.5934, 2.7406,\n 2.6768, 2.8226, 2.7591, 2.9035, 2.8402, 2.9832, 3.1251, 3.2660,\n 3.4058, 3.3420, 3.2788, 3.2161, 3.1539, 3.0923, 3.2299, 3.3665,\n 3.3049, 3.2437, 3.1831, 3.1229, 3.2577, 3.1977, 3.3314, 3.2717,\n 3.4042, 3.3447, 3.4762, 3.6068, 3.7366, 3.6770, 3.6178, 3.5590,\n 3.5007, 3.4428, 3.3853, 3.5131, 3.6401, 3.5827, 3.5256, 3.4689,\n 3.4126, 3.5382, 3.4821, 3.6067, 3.5508, 3.6745, 3.6188, 3.7417,\n 3.8638, 3.9853, 4.1061, 4.0501, 3.9945, 3.9392, 3.8843, 4.0038,\n 3.9491, 4.0678, 4.0132, 3.9590, 3.9052, 3.8516, 3.9691, 3.9158,\n 4.0325, 3.9793, 4.0953, 4.0423, 4.1576, 4.2723, 4.3864, 4.5000,\n 4.4468, 4.3938, 4.3412, 4.2889, 4.2369, 4.3492, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "85", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "63.5%", + "z-score": "8.2", + "p value": "1.17e-16", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.8490, 4.0814, 3.9337, 3.7905, 4.0166,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Longest Day ever lengthens. The 25th anniversary celebration of the first Normandy landing lasted three days; the 50th will spread out over a year.\nHypothesis: 50th Anniversary of Normandy Landings lasts a year.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.6348, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 1.0328,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.4403, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 1.1547, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.0507, 0.9909, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.9152, 0.8577, 0.8006, 0.7441, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.8296, 0.7746, 0.7201, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.8987, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.2136, 1.3443, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.6336, 1.5848, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.3453, 1.4713, 1.4241, 1.3771, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.4393, 1.5621, 1.5159, 1.4699, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.3221, 3.5590, 3.4207, 3.2863,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 4.8038, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.9204, 6.8041,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.7373, 7.6317,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.0822, 7.9796, 7.8782, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.9586, 9.0845, 8.9861, 9.1111, 9.2351, 9.3582, 9.4803, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 9.7912, 9.6977,\n 9.8150, 9.9315, 10.0472, 9.9547, 9.8632, 9.7725, 9.6828, 9.5938,\n 9.5057, 9.6210, 9.7356, 9.6484, 9.5620, 9.4763, 9.3915, 9.5054,\n 9.6186, 9.7312, 9.8430, 9.7590, 9.8702, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 10.0353, 9.9562, 10.0631, 9.9846, 9.9067, 9.8293, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.9940, 10.0987, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.6144, 10.7159, 10.6404, 10.7415, 10.8421, 10.7671, 10.8673, 10.7928,\n 10.7189, 10.8186, 10.9178, 11.0165, 10.9431, 11.0414, 11.1392, 11.2366,\n 11.1637, 11.0913, 11.1883, 11.1164, 11.0450, 10.9740, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Qin (from which the name China is derived) established the approximate boundaries and basic administrative system that all subsequent dynasties were to follow.\nHypothesis: Qin Shi Huang was the first Chinese Emperor.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.1790, -1.0499, -1.0890, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.8301, 2.6943, 2.9424, 3.1844, 3.4207, 3.2863,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.5968, 4.8003, 4.6667,\n 4.5363, 4.4091, 4.2848, 4.1633, 4.3618, 4.2426, 4.4374, 4.3205,\n 4.2060, 4.0937, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.7980, 4.6904, 4.5847, 4.4809, 4.3788, 4.5544, 4.4537, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.0332, 5.1978, 5.3605, 5.5213, 5.4222,\n 5.3245, 5.2281, 5.3867, 5.5435, 5.4482, 5.6032, 5.5090, 5.6622,\n 5.8139, 5.9641, 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 7.1110, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.1111, 7.0231, 7.1563, 7.0692, 7.2012,\n 7.3322, 7.4622, 7.3758, 7.5048, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.8406, 7.7566, 7.6734, 7.7976, 7.7152, 7.8384, 7.9608,\n 8.0824, 8.2032, 8.3231, 8.4423, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.6321, 8.5516, 8.4718, 8.3927, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.5052, 8.4286, 8.5424, 8.6556, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.0257, 9.1357, 9.2450, 9.3537, 9.4619, 9.5695, 9.6764,\n 9.6008, 9.7072, 9.8131, 9.9184, 10.0231, 10.1273, 10.0523, 10.1559,\n 10.0814, 10.1846, 10.1106, 10.0371, 10.1398, 10.2419, 10.3435, 10.4447,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.5998, 10.5278, 10.6271, 10.7258,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.1218, 11.2171, 11.3120, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Hands Across the Divide was formed in March 2001, and one of its immediate aims was to press for more freedom of contact and communication right away between the two parts of Cyprus, and for early progress towards a solution to 'the Cyprus problem'.\nHypothesis: Cyprus was divided into two parts in March 2001.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -0.9115, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.9492, -0.9933, -1.0371, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -0.9326, -0.9742, -1.0155, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.8889, 1.7457, 2.0370, 2.3190, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 2.3163, 2.5533, 2.7852, 3.0123, 3.2348, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.4101, 3.6141, 3.8146, 3.7033,\n 3.5942, 3.7905, 3.9837, 3.8759, 4.0657, 3.9595, 4.1461, 4.3301,\n 4.2251, 4.1219, 4.3026, 4.4809, 4.3788, 4.2784, 4.1797, 4.0825,\n 3.9869, 4.1612, 4.3333, 4.5034, 4.6715, 4.8375, 5.0017, 5.1640,\n 5.3245, 5.4832, 5.3867, 5.5435, 5.4482, 5.6032, 5.7566, 5.9084,\n 6.0587, 5.9641, 6.1128, 6.2601, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.7890, 6.9282, 7.0662, 6.9743, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.9768, 7.1111, 7.2443, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.6328, 7.7598, 7.8859, 8.0111,\n 8.1354, 8.0497, 7.9649, 7.8808, 8.0042, 7.9209, 7.8384, 7.7567,\n 7.6758, 7.7981, 7.7178, 7.8393, 7.9600, 7.8803, 8.0002, 8.1192,\n 8.2375, 8.3550, 8.4718, 8.5879, 8.5088, 8.4303, 8.3525, 8.4678,\n 8.3906, 8.5052, 8.6190, 8.7323, 8.8448, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.2118, 9.3212, 9.4299, 9.3537, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.6635, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.3435, 10.2706,\n 10.1981, 10.1262, 10.2273, 10.1558, 10.2565, 10.1855, 10.1149, 10.2151,\n 10.3148, 10.4140, 10.5128, 10.6111, 10.7090, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.8872, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The IOC meeting will also review reports submitted by the organizing committees of the 1998 Winter Olympic Games in Nagano, Japan; the 2000 Summer Olympic Games in Sydney, Australia and the 2002 Winter Olympic Games in the Salt Lake City, the United States, respectively.\nHypothesis: Before Salt Lake City, Winter Olympic Games took place in Nagano.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "201", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.4%", + "z-score": "0.774", + "p value": "0.22", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -0.6963, -0.7746, -0.5108, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, -0.0695, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.3303, -0.1644, -0.2182,\n -0.0543, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, 0.0000, -0.0501, 0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.3311, 0.4714, 0.4229, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.4593, 0.4121, 0.3651,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.4944, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.5740, 0.7044, 0.6584, 0.7878, 0.9165, 1.0445,\n 0.9981, 0.9520, 1.0788, 1.0328, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.8866, 1.0106, 0.9659, 0.9215, 0.8773, 1.0000,\n 0.9558, 1.0777, 1.0336, 0.9897, 0.9461, 0.9027, 0.8595, 0.8165,\n 0.7737])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415, 4.3409, 4.0825,\n 4.3710, 4.1312, 4.4096, 4.1851, 3.9727, 3.7712, 4.0415, 3.8497, 4.1111,\n 3.9279, 3.7524, 3.5839, 3.8367, 3.6742, 3.5176, 3.7626, 4.0012, 3.8490,\n 3.7017, 3.5590, 3.4207, 3.6515, 3.8772, 3.7417, 3.6098, 3.8297, 3.7009,\n 3.9158, 3.7897, 4.0000, 4.2064, 4.0825, 4.2848, 4.1633, 4.3618, 4.2426,\n 4.1260, 4.0119, 4.2060, 4.3970, 4.2844, 4.1740, 4.3614, 4.5461, 4.7281,\n 4.9075, 4.7980, 4.6904, 4.8669, 5.0410, 4.9348, 5.1065, 5.2760, 5.1711,\n 5.3383, 5.2350, 5.4000, 5.5630, 5.7242, 5.6220, 5.5213, 5.6804, 5.8377,\n 5.7382, 5.6401, 5.7955, 5.6986, 5.6032, 5.7566, 5.9084, 5.8139, 5.7207,\n 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.1283, 6.2725, 6.4153, 6.5569,\n 6.6973, 6.8364, 6.7456, 6.8834, 6.7937, 6.9303, 6.8414, 6.7536, 6.6667,\n 6.8019, 6.7159, 6.8500, 6.7648, 6.6804, 6.5970, 6.7298, 6.6471, 6.5653,\n 6.6968, 6.8274, 6.9570, 6.8757, 6.7952, 6.9237, 6.8439, 6.7648, 6.8922,\n 7.0187, 6.9402, 7.0658, 7.1904, 7.3143, 7.2363, 7.3592, 7.4813, 7.6026,\n 7.7232, 7.8429, 7.7653, 7.8842, 7.8072, 7.9253, 7.8489, 7.7732, 7.6980,\n 7.8153, 7.7407, 7.8571, 7.7831, 7.7096, 7.6368, 7.7524, 7.6800, 7.6082,\n 7.7230, 7.8372, 7.7658, 7.6950, 7.6246, 7.5548, 7.6681, 7.5988, 7.5299,\n 7.6424, 7.7544, 7.6859, 7.7971, 7.9078, 8.0178, 7.9497, 8.0591, 8.1679,\n 8.2762, 8.3840, 8.4911, 8.4232, 8.5298, 8.4623, 8.5683, 8.5012, 8.4345,\n 8.3683, 8.4736, 8.4078, 8.5126, 8.4471, 8.3820, 8.3173, 8.4215, 8.3572,\n 8.2933, 8.2298, 8.3333, 8.2702, 8.3732, 8.4757, 8.5778, 8.5148, 8.4523,\n 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Amazon shares fell nearly 4 percent following the results as the company said operating income would drop as much as 42 percent in the second quarter.\nHypothesis: Shares of Amazon fell 4 percent.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "17", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-0.7", + "p value": "0.758", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.6623, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.2205, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.9620, 3.8297, 3.7009, 3.9158, 3.7897, 3.6667,\n 3.5466, 3.7559, 3.9614, 3.8431, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.3578, 6.5137, 6.6679, 6.5607, 6.7132,\n 6.6075, 6.7583, 6.6541, 6.5514, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.1243, 7.2650, 7.4044, 7.3068,\n 7.4449, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.7937, 7.9259,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.2202, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.5133, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.9124, 10.0261, 9.9373, 9.8494, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.4537, 10.5621, 10.6700, 10.5848,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.7367, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.1761, 11.0940, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.7217, 11.6412, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.9558, 11.8766, 11.9741, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.2214, 12.1447, 12.2397, 12.3342, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.8007, 12.7248, 12.8169, 12.9087, 12.8333,\n 12.9247, 13.0157, 12.9410, 13.0316, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Hodler claimed there were also irregularities in the campaigns organized by Atlanta for the 1996 Summer Games, Sydney for the Summer Olympics in 2000 and Salt Lake City for the 2002 Winter Games.\nHypothesis: Before Salt Lake City, Winter Olympic Games took place in Nagano.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.2716, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.0525, 0.1045, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.0000, 0.1382, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.1721, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.2971, 0.2540, 0.2111, 0.3369, 0.2940, 0.4189, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.7155,\n 5.9297, 6.1389, 5.9530, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.2993, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.1261, 6.9903, 6.8573, 6.7269, 6.8924, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.1590, 7.0387, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.6823, 7.8296, 7.9754, 8.1196, 8.0042,\n 7.8905, 8.0335, 8.1750, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.6418, 8.5337, 8.6678, 8.5612, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.0067, 9.1343, 9.2609, 9.3865, 9.2847,\n 9.1840, 9.3088, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.5294, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.2763, 10.1840, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.1391, 10.2514, 10.3630, 10.4738, 10.3853, 10.4956,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.0102, 10.9259, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.6029, 11.7031, 11.6219, 11.5414, 11.6412, 11.7405, 11.8393, 11.9377,\n 12.0355, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.5049, 12.5986,\n 12.6918, 12.6153, 12.5394, 12.6323, 12.5568, 12.4818, 12.5745, 12.6667,\n 12.7585, 12.8499, 12.9410, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A compound in breast milk has been found to destroy many skin warts, raising hopes it also might prove effective against cervical cancer and other lethal diseases caused by the same virus.\nHypothesis: Breast milk may help fight cervical cancer.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, 0.1659, 0.3303, 0.4932, 0.6547,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.7396, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.3928, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660, 3.6566,\n 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641, 3.2206, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998, 3.5796, 3.8497, 3.6667,\n 3.4915, 3.3235, 3.5839, 3.4219, 3.2660, 3.5176, 3.3665, 3.2205, 3.4641,\n 3.3221, 3.1844, 3.0509, 3.2863, 3.5165, 3.7417, 3.6098, 3.8297, 3.7009,\n 3.9158, 3.7897, 4.0000, 3.8765, 4.0825, 3.9614, 3.8431, 4.0446, 4.2426,\n 4.1260, 4.0119, 4.2060, 4.3970, 4.5850, 4.4721, 4.6571, 4.8394, 5.0190,\n 4.9075, 5.0844, 5.2590, 5.1490, 5.3211, 5.2129, 5.1065, 5.2760, 5.1711,\n 5.0680, 5.2350, 5.1333, 5.0332, 4.9346, 5.0990, 5.2615, 5.4222, 5.3245,\n 5.4832, 5.6401, 5.5435, 5.6986, 5.6032, 5.7566, 5.6622, 5.5691, 5.4772,\n 5.6286, 5.5377, 5.4480, 5.5976, 5.7458, 5.6569, 5.5690, 5.7155, 5.8606,\n 6.0044, 5.9172, 6.0596, 6.2008, 6.3408, 6.2541, 6.3928, 6.5303, 6.4444,\n 6.5807, 6.4957, 6.4116, 6.5465, 6.4632, 6.3807, 6.5144, 6.4327, 6.3517,\n 6.2716, 6.4040, 6.5354, 6.6658, 6.5861, 6.7155, 6.8439, 6.7648, 6.8922,\n 6.8138, 6.9402, 6.8624, 6.7854, 6.7090, 6.8343, 6.7585, 6.6833, 6.8076,\n 6.9310, 6.8564, 6.7823, 6.9048, 7.0265, 7.1474, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.3584, 7.4762, 7.5933, 7.5204, 7.6368, 7.5644, 7.4927, 7.6082,\n 7.5369, 7.4662, 7.5809, 7.5106, 7.4409, 7.3717, 7.4855, 7.5988, 7.7114,\n 7.6424, 7.7544, 7.8657, 7.7971, 7.9078, 7.8397, 7.9497, 7.8820, 7.8147,\n 7.7480, 7.8572, 7.7908, 7.7249, 7.8335, 7.9415, 7.8759, 7.8107, 7.9181,\n 8.0249, 8.1312, 8.0663, 8.1721, 8.2773, 8.3820, 8.3173, 8.4215, 8.5252,\n 8.4608, 8.5640, 8.5000, 8.4364, 8.5390, 8.4757, 8.4128, 8.5148, 8.4523,\n 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The plan was released by Mr Dean on behalf of the Secretary of Health and Human Services, Tommy Thompson, still recovering from a recent accident, at a Secretarial Summit on Health Information Technology that was attended by many of the nation's leaders in electronic health records.\nHypothesis: Mr Dean is the Secretary of Health and Human Services.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.3%", + "z-score": "0.0821", + "p value": "0.467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.2222, -0.8729, -0.9649, -1.0541, -0.7259, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, 0.1601, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.5143, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.5507, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.0436, 0.1741,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.2540, 0.3800, 0.3369, 0.2940, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.7698, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.9599, 2.2011, 2.4371, 2.3238, 2.5538, 2.4422, 2.3333,\n 2.5568, 2.7761, 2.6679, 2.5621, 2.7757, 2.6713, 2.8804, 2.7775,\n 2.6765, 2.8808, 3.0817, 2.9814, 2.8830, 3.0796, 3.2733, 3.4641,\n 3.6522, 3.8376, 3.7383, 3.9208, 3.8228, 3.7264, 3.9056, 4.0825,\n 4.2571, 4.1612, 4.3333, 4.5034, 4.4083, 4.3146, 4.2222, 4.1312,\n 4.2981, 4.2080, 4.1192, 4.0316, 4.1957, 4.3580, 4.2710, 4.1851,\n 4.1003, 4.2601, 4.4182, 4.3339, 4.4901, 4.6448, 4.7980, 4.9497,\n 4.8655, 5.0156, 4.9322, 4.8497, 4.9980, 5.1450, 5.2906, 5.2085,\n 5.3526, 5.4956, 5.4140, 5.3333, 5.2535, 5.1745, 5.3156, 5.2372,\n 5.1597, 5.0829, 5.2223, 5.3606, 5.2842, 5.2086, 5.1338, 5.2705,\n 5.4061, 5.3316, 5.4661, 5.5995, 5.7320, 5.8635, 5.7892, 5.9196,\n 5.8458, 5.7726, 5.9019, 6.0302, 6.1577, 6.0848, 6.2113, 6.3369,\n 6.2644, 6.1926, 6.1213, 6.0506, 6.1750, 6.1047, 6.0351, 5.9660,\n 6.0892, 6.2116, 6.1429, 6.0746, 6.0069, 6.1283, 6.2489, 6.1815,\n 6.3013, 6.4203, 6.5387, 6.6564, 6.5891, 6.7060, 6.6391, 6.5727,\n 6.6887, 6.8041, 6.9189, 6.8527, 6.9667, 7.0801, 7.0142, 6.9488,\n 6.8838, 6.8192, 6.9317, 6.8675, 6.8037, 6.7404, 6.8520, 6.9631,\n 6.9000, 6.8373, 6.7751, 6.8853, 6.9950, 6.9330, 7.0420, 7.1506,\n 7.2585, 7.3660, 7.3041, 7.4109, 7.3493, 7.2881, 7.3943, 7.5000,\n 7.6052, 7.5441, 7.6488, 7.7530, 7.6922, 7.6317, 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Alice Cooper, a founder of the shock rock genre, and infamous for his gory stage shows, is setting up a Christian center for at-risk youths in Phoenix. Cooper, who has been a born again Christian for over 20 years, has already raised US$2 million for the center via his charity, the Solid Rock Foundation, founded by Cooper in 1995. \"The Rock\", as the center will be called, is expected to cost $7.3 million, and Cooper hopes that construction work will begin on the site, currently a grassy area near the Grand Canyon University, by November.\nHypothesis: Alice Cooper is a Christian.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.2981, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.2962, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.8889, 1.0507, 1.2111, 1.1508, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.2276, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.4472, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.4345, 0.5774,\n 0.7192, 0.8601, 0.8095, 0.7593, 0.7095, 0.8485, 0.9867, 0.9366,\n 0.8868, 1.0235, 0.9739, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.7789, 0.7336, 0.6885, 0.6437, 0.5991, 0.5548, 0.6810,\n 0.6367, 0.7620, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.8333,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 5.1962,\n 5.4611, 5.7155, 5.9604, 6.1968, 6.4254, 6.6469, 6.3805, 6.1283,\n 6.3509, 6.1143, 5.8889, 6.1101, 5.8966, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 7.3030,\n 7.4839, 7.3051, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.1742, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.5347, 8.4017, 8.2711, 8.4188, 8.2908, 8.1650,\n 8.0413, 8.1881, 8.0667, 7.9472, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.0934, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.7757, 8.9045, 9.0323, 9.1590, 9.2847,\n 9.1840, 9.3088, 9.4327, 9.5556, 9.6775, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 9.8494, 9.7622, 9.6758, 9.7890, 9.7034,\n 9.6186, 9.7312, 9.6471, 9.7590, 9.8702, 9.7869, 9.8975, 9.8150,\n 9.9249, 10.0342, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.3024,\n 10.2220, 10.1423, 10.2486, 10.1695, 10.2753, 10.3805, 10.4852, 10.4067,\n 10.5109, 10.4330, 10.5366, 10.6397, 10.7423, 10.6650, 10.7671, 10.6904,\n 10.7920, 10.7159, 10.8170, 10.7415, 10.8421, 10.9422, 10.8673, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.0165, 11.1148, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.2607, 11.3572, 11.2848, 11.3809, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.8769, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Pibul Songgram was the pro-Japanese military dictator of Thailand during World War 2.\nHypothesis: Pibul was the dictator of Thailand.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "194", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.8%", + "z-score": "0.249", + "p value": "0.402", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, -0.2335, -0.3086,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.0094, -1.0605, -0.8889, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.6508, -0.4988, -0.3478, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.1429, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.0919, -0.1374, 0.0000,\n 0.1365, 0.0907, 0.2261, 0.1803, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.1761, 0.3073, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.2513, 0.3760, 0.3333,\n 0.2909, 0.2487])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.5396, 1.8034, 2.0605, 1.9415, 2.1909,\n 2.0738, 1.9599, 2.2011, 2.0889, 2.3238, 2.5538, 2.7791, 3.0000,\n 3.2167, 3.1027, 2.9913, 2.8823, 3.0929, 3.2998, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.2796, 3.4743, 3.3729, 3.5642, 3.4641,\n 3.6522, 3.8376, 3.7383, 3.6407, 3.5447, 3.7264, 3.9056, 4.0825,\n 4.2571, 4.4296, 4.6000, 4.5034, 4.6715, 4.8375, 4.7419, 4.9058,\n 5.0679, 5.2281, 5.3867, 5.2915, 5.4482, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.6066, 6.7456, 6.8834, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.3333, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.3320, 9.2463, 9.3617, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.5346, 9.4513, 9.3686, 9.2867, 9.3993, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.5714, 9.6814, 9.7908, 9.7109, 9.8197, 9.7405,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.1968, 10.1189, 10.2242,\n 10.3289, 10.2516, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 11.1173, 11.2164, 11.3150,\n 11.4132, 11.5109, 11.6082, 11.7050, 11.8014, 11.7261, 11.8221, 11.7473,\n 11.8429, 11.9380, 12.0327, 11.9586, 12.0529, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: As spacecraft commander for Apollo XI, the first manned lunar landing mission, Armstrong was the first man to walk on the Moon. \"That's one small step for a man, one giant leap for mankind.\" With these historic words, man's dream of the ages was fulfilled.\nHypothesis: Neil Armstrong was the first man who landed on the Moon.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.3696, -0.1466, -0.2182, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 0.8617, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.2337, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.6030, 0.5507, 0.4988, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.0444, -0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.0434, 0.0865, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.5260, 4.7336, 4.5968, 4.4634, 4.3333,\n 4.2064, 4.4091, 4.2848, 4.4836, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.5118, 4.3970, 4.5850, 4.7703, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.0844, 4.9747, 4.8669, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.3383, 5.2350, 5.1333, 5.0332, 4.9346, 5.0990, 5.2615, 5.1640,\n 5.0679, 4.9731, 4.8797, 5.0395, 4.9472, 4.8561, 5.0138, 5.1698,\n 5.3243, 5.2338, 5.1444, 5.0562, 4.9691, 4.8830, 4.7980, 4.7140,\n 4.8655, 5.0156, 4.9322, 5.0807, 5.2278, 5.1450, 5.0630, 5.2085,\n 5.3526, 5.4956, 5.4140, 5.5556, 5.6959, 5.8351, 5.9732, 6.1101,\n 6.2459, 6.3807, 6.2991, 6.2183, 6.3517, 6.4842, 6.6157, 6.7462,\n 6.6658, 6.5861, 6.7155, 6.8439, 6.7648, 6.6865, 6.8138, 6.7361,\n 6.8624, 6.7854, 6.9107, 7.0353, 7.1590, 7.0823, 7.0063, 6.9310,\n 6.8564, 6.9789, 7.1007, 7.0265, 6.9529, 6.8799, 6.8075, 6.9282,\n 7.0481, 7.1673, 7.0952, 7.0238, 6.9529, 6.8825, 7.0006, 7.1181,\n 7.0481, 6.9786, 7.0952, 7.0262, 6.9577, 6.8897, 7.0054, 7.1204,\n 7.2348, 7.1670, 7.0998, 7.0330, 6.9667, 7.0801, 7.1929, 7.1270,\n 7.0614, 7.1735, 7.1083, 7.0436, 6.9793, 7.0905, 7.2012, 7.3113,\n 7.2472, 7.1835, 7.1203, 7.0574, 6.9950, 6.9330, 6.8713, 6.9803,\n 7.0888, 7.0273, 7.1352, 7.2425, 7.1813, 7.1205, 7.2272, 7.3333,\n 7.4390, 7.3783, 7.4834, 7.5880, 7.6922, 7.7958, 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Weber worked for WABC for 12 years, appearing on such shows as Curtis and Kuby, giving news updates for listeners at the top and bottom of every hour. After he was laid off by the station last year due to a change in programing, he was working as a freelance reporter for ABC News Radio, a national network. In his career before WABC, he worked for KTLK and KMPC, located in Los Angeles, California and KGO in San Fransisco, California.\nHypothesis: KTLK is located in San Francisco.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "71", + "Fraction of T in Greenlist": "35.7%", + "z-score": "3.48", + "p value": "0.000252", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.8889, 1.7457, 2.0370, 1.8974, 2.1776, 2.0412,\n 2.3116, 2.1783, 2.0494, 1.9245, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.4585, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.2743, 2.4804, 2.3851, 2.5873, 2.4930, 2.6914, 2.8868,\n 2.7928, 2.7005, 2.6098, 2.5205, 2.4327, 2.3462, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.7333, 2.6491, 2.8292, 2.7456, 2.6632, 2.5820,\n 2.7585, 2.6778, 2.8518, 2.7717, 2.9433, 3.1129, 3.0330, 2.9542,\n 2.8764, 2.7995, 2.7235, 2.6485, 2.8138, 2.7393, 2.9025, 2.8284,\n 2.9897, 3.1493, 3.0754, 3.0022, 2.9299, 2.8583, 2.7875, 2.7175,\n 2.8735, 2.8039, 2.9582, 2.8889, 3.0415, 3.1928, 3.1236, 3.0551,\n 2.9872, 2.9200, 2.8534, 2.7875, 2.9357, 2.8701, 3.0168, 2.9515,\n 3.0967, 3.0317, 3.1755, 3.1109, 3.0467, 2.9832, 3.1251, 3.0619,\n 3.2025, 3.1395, 3.2788, 3.4171, 3.3542, 3.2918, 3.2299, 3.1685,\n 3.1076, 3.0471, 3.1831, 3.1229, 3.2577, 3.1977, 3.3314, 3.4641,\n 3.4042, 3.3447, 3.2857, 3.2271, 3.1690, 3.1113, 3.2419, 3.1844,\n 3.3140, 3.2567, 3.3853, 3.5131, 3.4558, 3.3989, 3.3424, 3.2863,\n 3.2306, 3.1753, 3.3012, 3.2460, 3.3710, 3.3160, 3.4401, 3.5635,\n 3.5085, 3.4539, 3.3996, 3.3457, 3.2921, 3.2389, 3.3606, 3.3075,\n 3.4283, 3.3754, 3.4954, 3.6148, 3.5619, 3.5093, 3.4570, 3.4050,\n 3.3534, 3.3020, 3.4198, 3.3686, 3.4857, 3.4346, 3.5509, 3.6667,\n 3.6156, 3.5648, 3.5143, 3.4641, 3.4142, 3.3645, 3.4788])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "155", + "Fraction of T in Greenlist": "77.9%", + "z-score": "17.2", + "p value": "7.86e-67", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.3113, 7.4878, 7.6613, 7.4952, 7.6667,\n 7.8355, 8.0017, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.3881, 9.5321, 9.3834, 9.5263,\n 9.6676, 9.5230, 9.6632, 9.8020, 9.9392, 10.0750, 10.2093, 10.3423,\n 10.4739, 10.6043, 10.7333, 10.8612, 10.9878, 11.1132, 10.9777, 10.8444,\n 10.9697, 10.8388, 10.7099, 10.8350, 10.9589, 11.0818, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.5570, 11.6743, 11.7907, 11.6693, 11.7851,\n 11.9001, 12.0142, 12.1274, 12.2398, 12.3514, 12.4622, 12.5723, 12.4550,\n 12.3393, 12.4491, 12.5583, 12.6667, 12.7743, 12.6611, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.9750, 13.0798, 12.9704, 13.0748, 13.1785, 13.2816,\n 13.3840, 13.4859, 13.5871, 13.6878, 13.7878, 13.6816, 13.7813, 13.8804,\n 13.7757, 13.8745, 13.7710, 13.8695, 13.9675, 13.8654, 13.9630, 14.0601,\n 14.1567, 14.2527, 14.1524, 14.2481, 14.3434, 14.4381, 14.3393, 14.4338,\n 14.5277, 14.4301, 14.3333, 14.2374, 14.3314, 14.4250, 14.5181, 14.6107,\n 14.7029, 14.7947, 14.8860, 14.9769, 14.8831, 14.9737, 15.0639, 15.1537,\n 15.2430, 15.3320, 15.2397, 15.3284, 15.4167, 15.3254, 15.4135, 15.5012,\n 15.5885, 15.4983, 15.5853, 15.6720, 15.7584, 15.8443, 15.9299, 16.0151,\n 16.1000, 16.1846, 16.2688, 16.3526, 16.4361, 16.3481, 16.4314, 16.5144,\n 16.4272, 16.5100, 16.5925, 16.6746, 16.7564, 16.8379, 16.9191, 17.0000,\n 17.0806, 17.1609, 17.2408, 17.3205, 17.3999, 17.3149, 17.2304])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Russian cosmonaut Valery Polyakov set the record for the longest continuous amount of time spent in space, a staggering 438 days, between 1994 and 1995.\nHypothesis: Russians hold record for longest stay in space.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.6977, 1.9215, 1.8240, 1.7285, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.6131, 1.8185, 2.0207,\n 1.9335, 2.1320, 2.0455, 1.9604, 1.8766, 2.0702, 2.2611, 2.1773,\n 2.3651, 2.5504, 2.4667, 2.6491, 2.5660, 2.4841, 2.4034, 2.3238,\n 2.5019, 2.6778, 2.5983, 2.7717, 2.9433, 2.8638, 2.7854, 2.9542,\n 3.1211, 3.2863, 3.2077, 3.1300, 3.0533, 3.2157, 3.1394, 3.0641,\n 2.9897, 2.9161, 2.8433, 3.0022, 3.1597, 3.0870, 3.0151, 2.9439,\n 2.8735, 3.0282, 2.9582, 2.8889, 2.8203, 2.9726, 3.1236, 3.0551,\n 2.9872, 2.9200, 2.8534, 2.7875, 2.7222, 2.6575, 2.8051, 2.7406,\n 2.6768, 2.6135, 2.5508, 2.4887, 2.4271, 2.5717, 2.5103, 2.4495,\n 2.3891, 2.5318, 2.4717, 2.6131, 2.5532, 2.4938, 2.4348, 2.5744,\n 2.7129, 2.6540, 2.7913, 2.9277, 2.8687, 3.0039, 2.9451, 2.8868,\n 2.8288, 2.7713, 2.9048, 3.0373, 2.9798, 3.1113, 3.2419, 3.1844,\n 3.1273, 3.2567, 3.3853, 3.5131, 3.4558, 3.3989, 3.3424, 3.4689,\n 3.4126, 3.3567, 3.3012, 3.2460, 3.1912, 3.3160, 3.4401, 3.5635,\n 3.5085, 3.6310, 3.5762, 3.5218, 3.4677, 3.5890, 3.7097, 3.6556,\n 3.7755, 3.8947, 3.8406, 3.9590, 3.9052, 3.8516, 3.7984, 3.7455,\n 3.8627, 3.9793, 3.9265, 4.0423, 4.1576, 4.1048, 4.0522, 4.1667,\n 4.2805, 4.3938, 4.3412, 4.2889, 4.2369, 4.3492, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.7155,\n 5.5277, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.3708, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.5783, 5.7646, 5.9479, 5.8140, 5.6830, 5.8635,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.1118, 5.9954, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.7373, 7.8779,\n 7.7723, 7.6681, 7.8074, 7.7047, 7.6033, 7.7414, 7.8782, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.5258, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.0000, 7.9079, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.3625, 8.4868, 8.6102, 8.5210, 8.6436,\n 8.7652, 8.6770, 8.7978, 8.9178, 9.0370, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.3320, 9.2463, 9.3617, 9.2768, 9.1927, 9.3074,\n 9.4213, 9.3380, 9.2554, 9.3686, 9.2867, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.2697, 9.3810, 9.4916, 9.6016, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.5840, 9.6921, 9.7997, 9.9067, 9.8293, 9.9357, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.0987, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.7415, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.5998, 10.6990, 10.6271, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.6111, 10.7090, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.6944, 10.7910, 10.8872, 10.9829, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The harvest of sea-weeds is not allowed in the Puget Sound because of marine vegetation's vital role in providing habitat to important species.\nHypothesis: Marine vegetation is harvested.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "57", + "# Tokens in Greenlist": "17", + "Fraction of T in Greenlist": "29.8%", + "z-score": "0.841", + "p value": "0.2", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 1.0445, 0.9467, 1.1918, 1.0948, 1.0000,\n 1.2372, 1.1431, 1.0510, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "136", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "67.6%", + "z-score": "11.5", + "p value": "7.78e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.0212, 5.7155, 5.4306, 5.6804, 5.9214, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.9297, 6.1389, 5.9530, 6.1584, 6.3594, 6.5561, 6.7489, 6.9378,\n 6.7625, 6.5924, 6.7795, 6.9631, 6.7992, 6.9803, 7.1583, 7.0000,\n 6.8457, 7.0219, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.5707, 7.7326, 7.8923, 7.7517, 7.9097, 7.7723, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.6883, 7.8416, 7.9931, 8.1428, 8.0167, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.5743, 9.6995, 9.8237, 9.9469, 9.8416, 9.7376,\n 9.8601, 9.7574, 9.8792, 10.0000, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.2740, 10.3908, 10.5067, 10.6218, 10.5236, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.9917, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.5645, 11.4714, 11.3791, 11.4857])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: He became a boxing referee in 1964 and became most well-known for his decision against Mike Tyson, during the Holyfield fight, when Tyson bit Holyfield's ear.\nHypothesis: Mike Tyson bit Holyfield's ear in 1964.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -0.9649, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.1122, -0.1674, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.2449, 0.3904, 0.5348, 0.4845, 0.4345, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.3225, 0.2756, 0.4121, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.3928, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.6367, 0.5927, 0.5489, 0.6737, 0.6299, 0.5864, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.5375, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.5165, 3.7417, 3.6098, 3.4816, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.9614, 3.8431, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.5118, 4.3970, 4.2844, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.0844, 4.9747, 4.8669, 4.7610, 4.9348, 4.8305, 4.7278, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.9346, 5.0990, 5.2615, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.5435, 5.6986, 5.6032, 5.5090, 5.4160,\n 5.3243, 5.4772, 5.6286, 5.7785, 5.6875, 5.8358, 5.7458, 5.8926,\n 6.0380, 6.1820, 6.0927, 6.2354, 6.3768, 6.2883, 6.2008, 6.1143,\n 6.0288, 6.1685, 6.3070, 6.4444, 6.3595, 6.4957, 6.4116, 6.5465,\n 6.6804, 6.5970, 6.7298, 6.8615, 6.7788, 6.9094, 7.0391, 6.9570,\n 6.8757, 6.7952, 6.9237, 7.0513, 7.1779, 7.0980, 7.2236, 7.1443,\n 7.2691, 7.3930, 7.3143, 7.2363, 7.3592, 7.2818, 7.4039, 7.5251,\n 7.4483, 7.5687, 7.6883, 7.8072, 7.7308, 7.8489, 7.9663, 7.8905,\n 7.8153, 7.7407, 7.8571, 7.7831, 7.7096, 7.6368, 7.5644, 7.6800,\n 7.7949, 7.7230, 7.6517, 7.7658, 7.8793, 7.9921, 7.9211, 7.8507,\n 7.7808, 7.8928, 8.0042, 8.1150, 8.0455, 8.1556, 8.0865, 8.1960,\n 8.3050, 8.2362, 8.3446, 8.2762, 8.3840, 8.4911, 8.5978, 8.7039,\n 8.6359, 8.7414, 8.6738, 8.7788, 8.8832, 8.9872, 8.9199, 9.0233,\n 9.1262, 9.2287, 9.3306, 9.2637, 9.1971, 9.2986, 9.3995, 9.5000,\n 9.6000, 9.6996, 9.7987, 9.7325, 9.6666, 9.7653, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The team drawing up Iraq's new constitution considered giving itself more time to write the document on Sunday, but still looked set to meet its mid-August deadline under intense U.S. pressure.\nHypothesis: On Sunday, officials argued about whether to seek a delay of the August deadline for completing the document in order to give them more time to hash out such sticky issues.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 2.0889, 1.9795, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.4101, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.9814, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.0806, 3.2667, 3.1743, 3.0833, 3.2660,\n 3.4463, 3.3558, 3.2667, 3.1789, 3.0924, 3.0071, 2.9231, 2.8402,\n 2.7585, 2.6778, 2.5983, 2.5198, 2.4423, 2.3658, 2.2902, 2.4618,\n 2.3868, 2.3126, 2.2393, 2.1669, 2.0954, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.9149, 1.8475, 1.7809, 1.9437, 1.8773, 2.0381,\n 2.1974, 2.3552, 2.2884, 2.2222, 2.1567, 2.3120, 2.2468, 2.4004,\n 2.3354, 2.2711, 2.2074, 2.3586, 2.2952, 2.2323, 2.1700, 2.1082,\n 2.0470, 2.1954, 2.1344, 2.0739, 2.2205, 2.1602, 2.3054, 2.4495,\n 2.3891, 2.3293, 2.2699, 2.4121, 2.3529, 2.2943, 2.2361, 2.1783,\n 2.1210, 2.2608, 2.2037, 2.1470, 2.0907, 2.2287, 2.1726, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.0881, 2.0338, 1.9799, 1.9263, 2.0605,\n 2.0071, 1.9540, 1.9013, 1.8490, 1.9813, 1.9291, 2.0604, 2.0083,\n 1.9566, 2.0866, 2.2159, 2.3443, 2.2923, 2.4198, 2.3679, 2.3163,\n 2.4426, 2.3912, 2.5166, 2.4653, 2.4142, 2.3635, 2.3131, 2.2630,\n 2.2132, 2.1637, 2.1145, 2.0656, 2.0170, 1.9686, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.8058, 1.9267, 1.8799, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 4.8990,\n 4.7237, 4.5547, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.5823, 6.7568, 6.9286, 6.7893,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.6192, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.4425, 9.5751, 9.7065, 9.5876, 9.7181, 9.6011,\n 9.7306, 9.8590, 9.9863, 10.1124, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.8477, 10.9669, 10.8542, 10.9727, 11.0902, 10.9794, 11.0963,\n 11.2124, 11.3276, 11.4420, 11.5556, 11.6683, 11.7803, 11.8915, 12.0020,\n 12.1117, 12.2207, 12.3289, 12.2221, 12.3299, 12.4370, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.8582, 12.9616, 13.0644, 13.1665, 13.2681,\n 13.3690, 13.4694, 13.5693, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.7621, 13.6630, 13.5647, 13.6626, 13.5654, 13.6629, 13.7599, 13.8564,\n 13.7606, 13.8567, 13.9524, 14.0475, 14.1422, 14.2364, 14.3302, 14.2361,\n 14.3295, 14.2364, 14.3295, 14.2373, 14.1458, 14.2388, 14.3313, 14.4234,\n 14.5150, 14.6062, 14.6970, 14.6070, 14.5178, 14.6084, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.7023, 14.7916, 14.7049, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.7998, 14.7152, 14.8034, 14.7195, 14.8074, 14.7242, 14.6416,\n 14.7293, 14.8167, 14.9037, 14.9903, 14.9086, 14.9950, 15.0810, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.5060, 15.5900, 15.6736, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: He also referred to the \"illegal\" arrest on 31 May of Mexican Professor Maria Eugenia Ochoa Garcia, whom the Salvadoran government accused of having connections with the Salvadoran guerrillas.\nHypothesis: Professor Ochoa Garcia is a member of the Salvadoran government.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "86", + "Fraction of T in Greenlist": "43.2%", + "z-score": "5.93", + "p value": "1.47e-09", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.7107, 0.9169, 0.8402, 0.7646, 0.9661, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.4000, 1.5894, 1.7765, 1.9612, 2.1436, 2.0656,\n 1.9887, 2.1678, 2.3448, 2.5198, 2.6928, 2.8638, 2.7854, 2.7080,\n 2.6316, 2.5560, 2.7235, 2.6485, 2.8138, 2.9775, 3.1394, 3.2998,\n 3.2242, 3.1493, 3.0754, 3.0022, 2.9299, 3.0870, 3.2426, 3.3968,\n 3.5496, 3.7011, 3.6279, 3.5556, 3.7051, 3.6332, 3.5620, 3.7097,\n 3.8562, 4.0015, 4.1455, 4.2885, 4.2167, 4.1457, 4.0753, 4.0056,\n 3.9365, 4.0771, 4.2167, 4.3552, 4.4927, 4.6291, 4.5596, 4.4907,\n 4.6258, 4.5573, 4.6912, 4.6232, 4.5557, 4.4888, 4.6212, 4.5547,\n 4.4887, 4.4233, 4.3583, 4.4891, 4.6191, 4.5543, 4.4901, 4.6188,\n 4.5549, 4.4915, 4.6190, 4.5560, 4.4933, 4.6198, 4.5575, 4.4956,\n 4.4342, 4.3733, 4.3128, 4.4376, 4.3774, 4.3176, 4.4413, 4.5644,\n 4.5047, 4.6268, 4.7483, 4.8690, 4.8093, 4.7500, 4.8698, 4.8107,\n 4.7520, 4.8709, 4.9891, 4.9305, 4.8724, 4.9896, 5.1063, 5.0483,\n 5.1642, 5.2795, 5.3941, 5.5082, 5.4501, 5.5635, 5.6763, 5.7885,\n 5.9002, 5.8420, 5.7841, 5.8951, 5.8375, 5.9477, 5.8904, 5.8333,\n 5.7766, 5.8861, 5.8296, 5.7735, 5.7177, 5.8263, 5.9345])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660, 2.8868,\n 2.5560, 2.9593, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868, 3.2206, 3.5382,\n 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712, 3.5796, 3.3968, 3.6667,\n 3.4915, 3.3235, 3.1623, 3.0072, 2.8577, 2.7136, 2.9704, 3.2205, 3.4641,\n 3.3221, 3.1844, 3.4207, 3.6515, 3.5165, 3.7417, 3.9620, 3.8297, 3.7009,\n 3.5753, 3.4528, 3.3333, 3.2167, 3.1027, 2.9913, 2.8823, 3.0929, 3.2998,\n 3.5032, 3.3947, 3.2883, 3.4873, 3.6831, 3.5777, 3.7700, 3.9595, 3.8552,\n 3.7528, 3.9386, 3.8376, 3.7383, 3.6407, 3.5447, 3.4503, 3.3574, 3.2660,\n 3.4463, 3.6242, 3.8000, 3.7087, 3.6187, 3.7916, 3.9624, 3.8730, 4.0415,\n 4.2080, 4.1192, 4.0316, 4.1957, 4.1090, 4.0234, 3.9389, 3.8555, 3.7732,\n 3.6919, 3.6116, 3.7717, 3.9302, 4.0872, 4.0069, 3.9276, 4.0825, 4.2359,\n 4.1569, 4.3086, 4.4590, 4.3804, 4.3027, 4.4511, 4.3740, 4.2977, 4.2222,\n 4.1475, 4.0736, 4.0004, 3.9279, 4.0734, 4.2178, 4.3609, 4.2885, 4.2167,\n 4.3583, 4.4987, 4.4272, 4.5663, 4.7044, 4.6332, 4.5626, 4.6992, 4.6291,\n 4.5596, 4.4907, 4.4225, 4.3548, 4.2877, 4.2212, 4.3554, 4.4888, 4.6212,\n 4.7527, 4.8833, 5.0130, 5.1419, 5.2699, 5.3970, 5.3295, 5.2626, 5.1962,\n 5.1303, 5.2560, 5.3810, 5.5051, 5.6285, 5.7511, 5.8730, 5.8068, 5.9279,\n 6.0481, 5.9822, 6.1017, 6.2205, 6.1548, 6.2728, 6.3901, 6.5067, 6.6227,\n 6.5571, 6.6724, 6.7869, 6.9009, 7.0142, 7.1270, 7.2391, 7.1735, 7.2849,\n 7.3958, 7.3305, 7.2656, 7.3758, 7.4853, 7.4208, 7.5297, 7.6381, 7.5738,\n 7.6816, 7.7889, 7.8956, 7.8316, 7.7679, 7.8740, 7.9796, 8.0847, 8.1892,\n 8.1258, 8.0627, 8.1667, 8.2702, 8.2074, 8.3103, 8.4128, 8.3503, 8.2882,\n 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Mount Redoubt, a volcano in Alaska, erupted on Saturday, sending out clouds of ash thousands of feet high. According to the Federal Aviation Administration, the Ted Stevens Anchorage International Airport was shut down after ash reached the airport. Jeremy Lindseth, a spokesman for the airport, said that only small amount of ash reached the airport, but was significant enough to disrupt operations, as ash can cause engine problems for aircraft. He said that he was uncertain of how many flights were affected due to the eruption.\nHypothesis: The Ted Stevens Anchorage International Airport is the employer of Jeremy Lindseth.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.1707, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.0525, 0.1045, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n 0.1525, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.4778, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.7001, 0.6513, 0.6029, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.3073, 0.4377, 0.3928, 0.3482,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.2909, 0.2487, 0.3721, 0.3299, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.1170,\n 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547, 0.9802, 0.8165,\n 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714, 0.3464, 0.6794, 1.0000,\n 0.8729, 0.7505, 0.6325, 0.5185, 0.4082, 0.3015, 0.1980, 0.0976, 0.0000,\n 0.2847, 0.5620, 0.4623, 0.3651, 0.2705, 0.1782, 0.4402, 0.3482, 0.2582,\n 0.5108, 0.7579, 0.6667, 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.4142,\n 1.3234, 1.2344, 1.1471, 1.3646, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185,\n 2.0207, 1.9335, 1.8477, 1.7634, 1.9604, 1.8766, 2.0702, 1.9870, 2.1773,\n 2.3651, 2.2819, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 2.0656, 2.2453,\n 2.1678, 2.3448, 2.5198, 2.6928, 2.8638, 3.0330, 3.2004, 3.3659, 3.5298,\n 3.6919, 3.8523, 4.0112, 3.9302, 4.0872, 4.2426, 4.1621, 4.3158, 4.4680,\n 4.6188, 4.5384, 4.6876, 4.6079, 4.5291, 4.6765, 4.8226, 4.7442, 4.8889,\n 5.0323, 4.9543, 5.0964, 5.2372, 5.3769, 5.5155, 5.6530, 5.7894, 5.9247,\n 5.8464, 5.7689, 5.9029, 5.8260, 5.9589, 5.8825, 6.0143, 5.9386, 5.8635,\n 5.7892, 5.9196, 6.0491, 5.9752, 6.1036, 6.0302, 6.1577, 6.0848, 6.0125,\n 5.9409, 6.0671, 6.1926, 6.3172, 6.4409, 6.5639, 6.6861, 6.8075, 6.9282,\n 7.0481, 6.9762, 7.0952, 7.2136, 7.3312, 7.2596, 7.3765, 7.4927, 7.4215,\n 7.5369, 7.6517, 7.7658, 7.6950, 7.8084, 7.7380, 7.6681, 7.7808, 7.8928,\n 7.8233, 7.9347, 8.0455, 7.9764, 8.0865, 8.1960, 8.3050, 8.4133, 8.5212,\n 8.6284, 8.7351, 8.6662, 8.5978, 8.7039, 8.6359, 8.7414, 8.6738, 8.7788,\n 8.7116, 8.6448, 8.5785, 8.6828, 8.7867, 8.7207, 8.8240, 8.9268, 8.8612,\n 8.9635, 8.8982, 8.8333, 8.7689, 8.8706, 8.8065, 8.7427, 8.8439, 8.9446,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: FMLN reports to our people, and to the people of the world, that the massacre against the Salvadoran Workers National Union Federation [Fenastras] was carried out by Colonel Elena Fuente, as the head of the morbid death squad of the army's 1st Infantry Brigade, in response to our military attack on the army staff.\nHypothesis: Fenastras was attacked by FMLN.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.9258,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 1.1202, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.8980, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 0.8847, 1.0284, 0.9759, 0.9238, 1.0659, 1.2070, 1.1547,\n 1.1028, 1.2423, 1.1905, 1.3288, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 0.9272, 1.0565, 1.1852, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 1.0788, 1.2049, 1.1587, 1.1127, 1.2377, 1.1918,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.1339, 1.0890, 1.2115, 1.3333,\n 1.2883, 1.2435, 1.1990, 1.1547, 1.1106, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 2.8301, 3.0792, 2.9424, 2.8098, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 3.1334, 3.0123, 2.8943, 2.7791, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 3.7273, 3.6141, 3.5032, 3.7033,\n 3.9001, 4.0937, 3.9837, 4.1740, 4.0657, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.7610, 4.6568, 4.5544, 4.7278, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.0332, 4.9346, 4.8375, 5.0017, 5.1640,\n 5.3245, 5.4832, 5.6401, 5.5435, 5.4482, 5.6032, 5.7566, 5.9084,\n 5.8139, 5.9641, 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.4153, 6.3248, 6.4663, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.7049, 6.6171, 6.7536, 6.8889, 7.0231, 7.1563, 7.2884, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.4193, 7.5472, 7.4625, 7.3786,\n 7.5056, 7.6315, 7.7566, 7.8808, 8.0042, 7.9209, 7.8384, 7.9608,\n 8.0824, 8.2032, 8.1214, 8.2413, 8.1602, 8.0798, 8.1989, 8.3172,\n 8.4348, 8.5516, 8.6677, 8.5879, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.2118, 9.1357, 9.2450, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.4188, 9.3443, 9.4513, 9.5577, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 9.9642, 10.0668, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.4281, 10.3566, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.5128, 10.6111, 10.5410, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.8602, 10.9564, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: IBM stole trade secrets to copy two of its programs -- File-AID, a file manager, and Abend-AID, a program that helps users locate the source of glitches.\nHypothesis: Trade secrets were stolen.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "89", + "Fraction of T in Greenlist": "44.7%", + "z-score": "6.43", + "p value": "6.57e-11", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.5627, 2.8098, 3.0509, 2.9212,\n 2.7952, 3.0290, 2.9055, 2.7852, 2.6681, 2.8943, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.3147, 3.2026, 3.0929, 3.2998, 3.1918, 3.0861,\n 3.2883, 3.1840, 3.0817, 3.2796, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.2691, 3.4562, 3.6407, 3.5447, 3.4503, 3.6315, 3.8103,\n 3.7166, 3.8927, 3.8000, 3.9736, 3.8819, 4.0531, 3.9624, 4.1312,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.1957, 4.1090, 4.0234, 4.1851,\n 4.3451, 4.2601, 4.4182, 4.5747, 4.4901, 4.4066, 4.3241, 4.4783,\n 4.3966, 4.3158, 4.4680, 4.3879, 4.3086, 4.4590, 4.3804, 4.5291,\n 4.6765, 4.5983, 4.5210, 4.4444, 4.5899, 4.7341, 4.6580, 4.5826,\n 4.7252, 4.8666, 4.7916, 4.9317, 4.8572, 4.9960, 4.9221, 5.0596,\n 4.9862, 5.1225, 5.2578, 5.1848, 5.1123, 5.2463, 5.1744, 5.1031,\n 5.0325, 5.1650, 5.2965, 5.2262, 5.3567, 5.4863, 5.4163, 5.3468,\n 5.2779, 5.4062, 5.3378, 5.2699, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.3220, 5.4471, 5.5714, 5.5051, 5.4393, 5.3740, 5.4971, 5.6195,\n 5.5544, 5.4899, 5.6112, 5.7319, 5.6675, 5.7874, 5.7234, 5.8424,\n 5.7787, 5.8969, 5.8336, 5.9510, 6.0678, 6.0047, 5.9420, 6.0579,\n 5.9956, 5.9336, 5.8721, 5.9871, 6.1014, 6.0401, 6.1537, 6.2668,\n 6.2057, 6.1449, 6.0846, 6.1968, 6.1367, 6.0770, 6.1884, 6.1290,\n 6.0700, 6.1807, 6.1219, 6.2319, 6.3414, 6.2828, 6.2246, 6.1667,\n 6.2753, 6.3835, 6.3258, 6.2684, 6.3758, 6.4828, 6.4256])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.84", + "p value": "4.62e-19", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.0984, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.3890, 6.2610, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.8458, 6.7333, 6.6227, 6.5137, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.1996, 6.1012, 6.2517, 6.1546,\n 6.3035, 6.2075, 6.3549, 6.2601, 6.1664, 6.0740, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.3248, 6.2354, 6.3768, 6.5169, 6.4283, 6.3408,\n 6.2541, 6.1685, 6.0838, 6.2222, 6.3595, 6.2755, 6.1924, 6.1101,\n 6.0287, 5.9481, 6.0837, 6.0038, 6.1382, 6.0590, 6.1923, 6.1137,\n 6.0359, 5.9589, 5.8825, 6.0143, 6.1451, 6.0693, 5.9941, 6.1237,\n 6.2524, 6.1777, 6.3054, 6.4322, 6.3580, 6.2843, 6.4101, 6.5350,\n 6.6591, 6.7823, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.3584, 7.4762, 7.4034, 7.5204, 7.4482, 7.5644, 7.4927,\n 7.4215, 7.3508, 7.2807, 7.2111, 7.1420, 7.2572, 7.1885, 7.3030,\n 7.2348, 7.3485, 7.4616, 7.3937, 7.3263, 7.4386, 7.3717, 7.4833,\n 7.4167, 7.5277, 7.6381, 7.7480, 7.8572, 7.9659, 8.0741, 8.1817,\n 8.2887, 8.3952, 8.3286, 8.2624, 8.3683, 8.4736, 8.4078, 8.3423,\n 8.2773, 8.2127, 8.1485, 8.2531, 8.1892, 8.2933, 8.3969, 8.5000,\n 8.6026, 8.7048, 8.8065, 8.9077, 8.8439])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Nokia, Texas Instruments and other leading makers of mobile phones have formally complained to Brussels that Qualcomm, the US mobile chipmaker, has unfairly used its patents on 3G technologies.\nHypothesis: Texas Instruments produces mobile phones.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -1.8543, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.6081, -1.6521, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.5181, -1.5614, -1.6045, -1.6473, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.1393,\n 7.0379, 7.1813, 7.0812, 6.9824, 7.1243, 7.0268, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.0211, 7.1591, 7.2960, 7.2029, 7.3386, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.5556, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.9178, 8.8304, 8.7439, 8.8631, 8.7773,\n 8.6924, 8.8108, 8.7267, 8.6433, 8.7610, 8.8778, 8.7952, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.0117, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.3810, 9.3017, 9.4124, 9.3338, 9.2559, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 9.8590,\n 9.9648, 9.8887, 9.8131, 9.9184, 9.8433, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.7886, 9.8918, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.0547, 10.1558, 10.2565, 10.1855, 10.2856, 10.3853,\n 10.3148, 10.4140, 10.3439, 10.2743, 10.2050, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.8872, 10.8184, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Two British soldiers have been arrested in the southern Iraq city of Basra, sparking clashes outside a police station where they are being held.\nHypothesis: Two British tanks, sent to the police station where the soldiers are being held, were set alight in clashes.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "63", + "# Tokens in Greenlist": "15", + "Fraction of T in Greenlist": "23.8%", + "z-score": "-0.218", + "p value": "0.586", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.8995, 7.0711, 6.9286, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.2908, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.4984, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.3152, 8.2035, 8.3425, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.2376, 9.3641, 9.4896, 9.3865, 9.5111,\n 9.4094, 9.3088, 9.4327, 9.5556, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.7219, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.3695, 10.2763, 10.3898, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 11.9181, 12.0185, 11.9341, 11.8503, 11.9504, 12.0499,\n 11.9669, 11.8846, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.2467, 12.1677, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.2214, 12.3163, 12.4109, 12.5049, 12.4283,\n 12.5221, 12.4460, 12.3705, 12.4638, 12.3888, 12.4818, 12.4074, 12.3333,\n 12.4260, 12.3525, 12.4448, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Police in the Lower Austrian town of Amstetten have arrested a 73 year old man who is alleged to have kept his daughter, now aged 42, locked in the cellar of his house in Amstetten since 29th August 1984. The man, identified by police as Josef Fritzl, is alleged to have started sexually abusing his daughter, named as Elisabeth Fritzl, when she was eleven years old, and to have subsequently fathered seven children by her. One of the children, one of a set of twins born in 1996, died of neglect shortly after birth and the body was burned by the father.\nHypothesis: Amstetten is located in Austria.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.2472, -1.3207, -1.0445, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.2542, -0.1013, 0.0504, 0.2010, 0.3504, 0.2993, 0.4472, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.2275, -0.0907, 0.0452, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.5347, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.1723, 5.0037, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.1355, 6.0125, 6.1828, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.0495, 7.9455, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.7482, 8.8780, 9.0067, 9.1343, 9.0323, 8.9314, 8.8318,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.6732, 9.5795, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.1391, 10.0504, 10.1627, 10.2743, 10.1865, 10.0995,\n 10.2106, 10.1243, 10.2348, 10.1494, 10.0647, 10.1745, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.5238, 10.6306, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.5217, 11.6219, 11.5414, 11.6412, 11.5613, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.5234, 11.6220, 11.7200, 11.8176, 11.9147, 11.8373,\n 11.7604, 11.8571, 11.9534, 11.8771, 11.8014, 11.7261, 11.8221, 11.9176,\n 11.8429, 11.9380, 11.8638, 11.7901, 11.8849, 11.9792, 12.0731, 12.0000,\n 12.0935, 12.0209, 11.9487, 11.8769, 11.9701, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Microsoft for instance, one of the world's largest software companies, lost more than an estimated 500 million dollars last year because of software theft.\nHypothesis: Microsoft would spend 500 million dollars.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, 0.1081, 0.0538, 0.2144, 0.1601, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.1826,\n 0.1365, 0.0907, 0.0452, 0.0000, -0.0449, -0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, 0.0442, 0.0000, 0.1317, 0.0875, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, 0.0000, -0.0420, 0.0838, 0.0418, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.5597, 6.7213, 6.8810, 7.0387, 7.1945, 7.3485,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.4524, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 7.8113, 7.7026, 7.8444, 7.9849, 7.8779,\n 7.7723, 7.9115, 8.0495, 8.1862, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.6359, 8.7629, 8.6667, 8.7927, 8.9178, 8.8227, 8.9469,\n 8.8529, 8.7600, 8.8833, 9.0057, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.1252, 9.2435, 9.3611, 9.2729, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.7590, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.1167, 10.0342, 10.1429, 10.2509, 10.1692, 10.0881, 10.1955, 10.3024,\n 10.4087, 10.5145, 10.6196, 10.5393, 10.6439, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 10.9422, 10.8673, 10.9669,\n 11.0661, 10.9917, 10.9178, 11.0165, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.2848, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.6179, 11.7120, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Primary prevention of Alzheimer's disease is not possible at present.\nHypothesis: Alzheimer's disease is treated using drugs.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.7454, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.5620,\n -0.4201, -0.2791, -0.1391, 0.0000, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.4021, 0.5345,\n 0.4885, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.4774, 0.6058, 0.5610, 0.5164, 0.4721, 0.5991, 0.5548, 0.6810,\n 0.8065, 0.9313, 1.0555, 1.1790, 1.3019, 1.2566, 1.2115, 1.3333,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.3950, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "45.2%", + "z-score": "6.59", + "p value": "2.21e-11", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660, 3.6566,\n 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142, 1.2702, 1.5852, 1.4444,\n 1.7457, 2.0370, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 1.6590, 1.9245,\n 2.1831, 2.4351, 2.6811, 2.5560, 2.7952, 3.0290, 2.9055, 2.7852, 3.0123,\n 3.2348, 3.1160, 3.0000, 3.2167, 3.1027, 3.3147, 3.5228, 3.4101, 3.2998,\n 3.1918, 3.0861, 2.9824, 3.1840, 3.0817, 2.9814, 2.8830, 2.7863, 2.6914,\n 2.5981, 2.7928, 2.7005, 2.6098, 2.5205, 2.7107, 2.6222, 2.8093, 2.9938,\n 2.9057, 3.0873, 3.0000, 2.9140, 3.0924, 3.0071, 3.1829, 3.0984, 3.0151,\n 2.9329, 2.8518, 2.7717, 2.9433, 2.8638, 2.7854, 2.7080, 2.6316, 2.5560,\n 2.4814, 2.6485, 2.5743, 2.5011, 2.4286, 2.5927, 2.5207, 2.6828, 2.8433,\n 2.7713, 2.9299, 2.8583, 2.7875, 2.7175, 2.8735, 2.8039, 2.7349, 2.8889,\n 3.0415, 3.1928, 3.1236, 3.0551, 3.2044, 3.3526, 3.2841, 3.4308, 3.3627,\n 3.2953, 3.2285, 3.3731, 3.3066, 3.2408, 3.3838, 3.5256, 3.4599, 3.3947,\n 3.3301, 3.4701, 3.6091, 3.7471, 3.6824, 3.8191, 3.9549, 3.8903, 3.8262,\n 3.9606, 3.8968, 4.0301, 3.9666, 3.9036, 3.8411, 3.9729, 3.9107, 4.0415,\n 4.1713, 4.3004, 4.4286, 4.3661, 4.3042, 4.2426, 4.3695, 4.4956, 4.6209,\n 4.5594, 4.6838, 4.6225, 4.5617, 4.5013, 4.6245, 4.5644, 4.5047, 4.6268,\n 4.7483, 4.6887, 4.8093, 4.7500, 4.8698, 4.9889, 4.9297, 4.8709, 4.9891,\n 5.1066, 5.0479, 5.1647, 5.1063, 5.2223, 5.1642, 5.1064, 5.0489, 5.1640,\n 5.1068, 5.0499, 5.1642, 5.2778, 5.3909, 5.5033, 5.6153, 5.5582, 5.6695,\n 5.7802, 5.8904, 6.0000, 6.1091, 6.2177, 6.3258, 6.2684, 6.3758, 6.4828,\n 6.5893])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Easter is over but that doesn't mean you can't enjoy \"Ben-Hur,\" the granddaddy of biblical epics and winner of a record 11 Oscars, including best picture and best actor (Charlton Heston in the title role).\nHypothesis: Charlton Heston played the title role in the movie \"Ben-Hur\".\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.2%", + "z-score": "5.61", + "p value": "1.03e-08", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.6963, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.7237, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.2599, 1.1898, 1.3697, 1.2999, 1.4771,\n 1.6524, 1.8257, 1.9973, 1.9262, 1.8559, 1.7865, 1.9548, 1.8856,\n 2.0517, 1.9829, 2.1470, 2.3094, 2.4703, 2.4010, 2.3324, 2.2646,\n 2.1974, 2.3552, 2.2884, 2.4444, 2.3779, 2.5322, 2.6852, 2.8368,\n 2.7699, 2.9200, 2.8534, 2.7875, 2.9357, 2.8701, 3.0168, 2.9515,\n 3.0967, 3.2408, 3.3838, 3.3182, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.2025, 3.3420, 3.2788, 3.4171, 3.5544, 3.6908, 3.8262, 3.7626,\n 3.6995, 3.6369, 3.7707, 3.7084, 3.8411, 3.7791, 3.9107, 4.0415,\n 4.1713, 4.1092, 4.2381, 4.1763, 4.1150, 4.2426, 4.1816, 4.3083,\n 4.2475, 4.3733, 4.4983, 4.6225, 4.5617, 4.5013, 4.4413, 4.3818,\n 4.5047, 4.4454, 4.5674, 4.5083, 4.6295, 4.7500, 4.8698, 4.8107,\n 4.7520, 4.6938, 4.6359, 4.7544, 4.6968, 4.8146, 4.7572, 4.8742,\n 4.9906, 5.1064, 5.0489, 4.9918, 4.9351, 4.8787, 4.9934, 4.9373,\n 5.0513, 4.9953, 5.1086, 5.2213, 5.3335, 5.2776, 5.3891, 5.3333,\n 5.2779, 5.3887, 5.3335, 5.4436, 5.3886, 5.4981, 5.6070])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.8034, 1.6859, 1.5717, 1.8257,\n 1.7132, 1.9599, 1.8489, 2.0889, 2.3238, 2.5538, 2.4422, 2.3333,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.7700, 3.6662, 3.8552, 4.0415,\n 3.9386, 4.1219, 4.3026, 4.4809, 4.3788, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.8667, 4.7683, 4.9346, 5.0990, 5.0017, 4.9058,\n 5.0679, 5.2281, 5.1332, 5.0395, 4.9472, 5.1051, 5.2614, 5.4160,\n 5.5691, 5.4772, 5.6286, 5.5377, 5.6875, 5.8358, 5.9827, 5.8926,\n 5.8035, 5.7155, 5.8606, 6.0044, 6.1470, 6.2883, 6.2008, 6.3408,\n 6.4795, 6.3928, 6.3070, 6.4444, 6.5807, 6.4957, 6.4116, 6.3283,\n 6.4632, 6.5970, 6.7298, 6.8615, 6.7788, 6.9094, 7.0391, 7.1678,\n 7.2956, 7.4225, 7.3402, 7.2587, 7.1779, 7.3037, 7.4286, 7.5526,\n 7.6758, 7.5955, 7.7178, 7.8393, 7.9600, 8.0798, 8.1989, 8.1192,\n 8.0402, 7.9619, 8.0801, 8.1976, 8.3143, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 8.8448, 8.7681, 8.6921, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.0601, 9.1694, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.4513, 9.3774, 9.4837, 9.5896, 9.6948, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.2419, 10.1690, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.4281, 10.3566, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.8505, 10.7795, 10.7090, 10.6389, 10.7363, 10.8333,\n 10.9299, 11.0261, 10.9564, 11.0521, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: And, despite its own suggestions to the contrary, Oracle will sell PeopleSoft and JD Edwards financial software through reseller channels to new customers.\nHypothesis: Oracle sells financial software.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.9847,\n 1.1628, 1.3389, 1.2710, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.3483, 1.5164, 1.4506, 1.3856, 1.3213, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.3700, 1.5275,\n 1.6837, 1.8385, 1.7767, 1.7154, 1.8682, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.4517, 1.3943, 1.5430, 1.6906, 1.8371,\n 1.7792, 1.7217, 1.8664, 1.8091, 1.7522, 1.8953, 2.0373, 1.9803,\n 1.9237, 2.0642, 2.0078, 1.9518, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.4662, 1.6028, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.7970, 1.9291, 1.8773, 1.8257,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.5848, 1.5363, 1.4881, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.4857, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.2435, 1.3644, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660, 2.8868,\n 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.8856, 1.7321, 2.0381, 1.8889,\n 1.7457, 1.6082, 1.8974, 2.1776, 2.4495, 2.3116, 2.5744, 2.4398, 2.3094,\n 2.1831, 2.0605, 2.3113, 2.5560, 2.4345, 2.3163, 2.2011, 2.4371, 2.3238,\n 2.2133, 2.4422, 2.3333, 2.2269, 2.1229, 2.0211, 1.9215, 2.1412, 2.3570,\n 2.5690, 2.4689, 2.3706, 2.2743, 2.4804, 2.6833, 2.5873, 2.4930, 2.6914,\n 2.8868, 3.0793, 3.2691, 3.1741, 3.3607, 3.2667, 3.1743, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.0000, 2.9140, 2.8292, 3.0071, 2.9231, 2.8402, 2.7585,\n 2.9329, 2.8518, 3.0237, 2.9433, 2.8638, 2.7854, 2.9542, 3.1211, 3.2863,\n 3.2077, 3.3708, 3.2928, 3.2157, 3.1394, 3.0641, 3.2242, 3.3826, 3.3075,\n 3.2332, 3.1597, 3.3156, 3.2426, 3.1704, 3.3243, 3.2525, 3.1814, 3.1111,\n 3.0415, 2.9726, 3.1236, 3.2733, 3.4217, 3.5689, 3.4995, 3.4308, 3.5762,\n 3.5079, 3.4402, 3.3731, 3.5166, 3.6590, 3.5920, 3.5256, 3.4599, 3.6004,\n 3.7399, 3.6742, 3.6091, 3.7471, 3.8841, 4.0202, 4.1552, 4.0898, 4.2237,\n 4.1586, 4.0941, 4.2267, 4.1625, 4.0988, 4.0356, 3.9729, 3.9107, 3.8490,\n 3.9795, 3.9181, 3.8571, 3.7966, 3.9258, 3.8655, 3.9936, 3.9337, 3.8741,\n 3.8150, 3.9418, 4.0678, 4.1931, 4.1338, 4.2582, 4.1992, 4.1406, 4.0825,\n 4.0247, 4.1477, 4.2699, 4.2122, 4.1549, 4.0980, 4.2191, 4.1624, 4.1061,\n 4.2262, 4.1700, 4.1143, 4.0589, 4.0038, 3.9491, 4.0678, 4.1859, 4.3033,\n 4.2485, 4.1940, 4.1399, 4.2563, 4.3721, 4.3180, 4.2642, 4.3792, 4.4936,\n 4.6074, 4.7206, 4.6667, 4.7792, 4.7255, 4.6720, 4.7838, 4.7305, 4.6775,\n 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The United Kingdom, Turkey, \"Old\" British commonwealth forces ( New Zealand, Aussies, Canadians, Gurkhas, India), Germany, Poland, Vietnam, some units from USA, Russia, France, China (but only some) Japan would be included but I'm uncomfortable about using the word respected for Japanese soldiers for obvious reasons.\nHypothesis: The Gurkhas come from Nepal.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "62", + "Fraction of T in Greenlist": "31.2%", + "z-score": "2.01", + "p value": "0.0225", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 1.0510, 0.9608, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 0.9152, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.8617, 0.8003, 0.7395, 0.9058,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.2276, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.5681, 1.7179, 1.8665, 1.8074, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.1882,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.4335, 1.5714, 1.7085, 1.6554, 1.7913, 1.9263, 1.8732,\n 2.0071, 2.1401, 2.0868, 2.2188, 2.1656, 2.2966, 2.2436, 2.3735,\n 2.3206, 2.2680, 2.2159, 2.1640, 2.1125, 2.0613, 2.0105, 1.9599,\n 1.9097, 1.8598, 1.9868, 1.9370, 2.0631, 2.0134, 2.1385, 2.0889,\n 2.0396, 1.9906, 2.1145, 2.0656, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.8999, 2.0212, 1.9738, 1.9267, 1.8799, 2.0000,\n 1.9533, 1.9068, 2.0259, 1.9795, 1.9333, 2.0515, 2.0054])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "130", + "Fraction of T in Greenlist": "65.3%", + "z-score": "13.1", + "p value": "1e-39", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 1.1793, 1.4757, 1.7628, 1.6330,\n 1.9096, 2.1783, 2.0494, 2.3094, 2.5627, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 3.7009, 3.5753, 3.7897, 3.6667,\n 3.8765, 4.0825, 3.9614, 4.1633, 4.0446, 4.2426, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.9438, 6.1118, 5.9954, 5.8812, 5.7689, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 8.0822, 8.2178, 8.1152, 8.0139,\n 8.1483, 8.2816, 8.1816, 8.0829, 8.2151, 8.3463, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.0987, 9.2202, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.6566, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.0261, 10.1391, 10.2514, 10.1627, 10.2743, 10.3853, 10.4956,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.2857, 11.2001, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.4450, 11.3616, 11.4638, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.6412, 11.7405, 11.8393, 11.9377,\n 12.0355, 12.1329, 12.2298, 12.1502, 12.0712, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.5495, 12.6439, 12.7378, 12.8313, 12.7532, 12.6757, 12.5986,\n 12.6918, 12.6153, 12.5394, 12.4638, 12.5568, 12.6494, 12.7416, 12.8333,\n 12.9247, 12.8499, 12.7756, 12.8667, 12.9574, 13.0477, 13.1377])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Ssangyong Motor was taken over by creditors after it collapsed under heavy debts during the 1997-98 Asian financial crisis.\nHypothesis: Asian financial crisis takes over Ssangyong Motor\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.0949, -0.1873, 0.0925, 0.3651,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.9258,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 1.1648, 1.3608,\n 1.5542, 1.4765, 1.4000, 1.3245, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.8378, 1.7638, 1.6908, 1.6187, 1.7951, 1.9695,\n 2.1420, 2.0692, 1.9973, 1.9262, 1.8559, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.9829, 1.9149, 1.8475, 1.7809, 1.9437, 2.1049, 2.2646,\n 2.1974, 2.1309, 2.0651, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 2.1182, 2.0548, 1.9920, 1.9298, 2.0817, 2.2323, 2.3817, 2.3190,\n 2.2569, 2.1954, 2.1344, 2.0739, 2.0140, 2.1602, 2.1005, 2.0412,\n 1.9825, 2.1268, 2.0682, 2.0101, 1.9524, 2.0948, 2.2361, 2.3764,\n 2.3183, 2.2608, 2.2037, 2.1470, 2.2852, 2.2287, 2.1726, 2.1170,\n 2.2535, 2.3891, 2.3333, 2.2780, 2.2230, 2.1685, 2.1143, 2.0605,\n 2.0071, 2.1401, 2.0868, 2.0339, 1.9813, 2.1128, 2.0604, 2.0083,\n 1.9566, 2.0866, 2.2159, 2.3443, 2.2923, 2.2406, 2.1892, 2.3163,\n 2.2650, 2.3912, 2.3400, 2.2892, 2.2387, 2.3635, 2.3131, 2.2630,\n 2.2132, 2.1637, 2.1145, 2.0656, 2.1886, 2.1398, 2.0913, 2.0430,\n 2.1648, 2.1167, 2.2377, 2.3580, 2.4778, 2.4294, 2.3812, 2.3333,\n 2.4520, 2.4042, 2.5220, 2.4744, 2.4269, 2.5439, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 1.1793, 1.0541, 1.3480, 1.2247,\n 1.1055, 1.3862, 1.6590, 1.5396, 1.4237, 1.6859, 1.9415, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.4422, 2.6667,\n 2.8868, 2.7761, 2.9913, 3.2026, 3.4101, 3.6141, 3.8146, 4.0119,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.0657, 4.2528, 4.4371, 4.3301,\n 4.2251, 4.1219, 4.0205, 3.9208, 3.8228, 4.0024, 3.9056, 4.0825,\n 3.9869, 3.8927, 4.0667, 3.9736, 3.8819, 4.0531, 4.2222, 4.1312,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.1957, 4.1090, 4.2710, 4.1851,\n 4.1003, 4.0166, 3.9340, 3.8523, 4.0112, 3.9302, 3.8503, 3.7712,\n 3.6931, 3.6159, 3.7717, 3.9260, 3.8490, 4.0016, 4.1528, 4.0762,\n 4.0004, 3.9254, 3.8512, 4.0000, 4.1475, 4.2938, 4.2196, 4.1461,\n 4.0734, 4.0015, 4.1455, 4.2885, 4.4302, 4.5708, 4.7104, 4.8488,\n 4.9862, 5.1225, 5.2578, 5.1848, 5.3189, 5.4521, 5.5842, 5.7155,\n 5.8458, 5.9752, 6.1036, 6.2312, 6.3580, 6.4838, 6.6088, 6.7330,\n 6.6591, 6.7823, 6.9048, 7.0265, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.8988, 8.0139, 7.9403, 8.0546,\n 8.1683, 8.2813, 8.3937, 8.5054, 8.6165, 8.5433, 8.6537, 8.7636,\n 8.8728, 8.8000, 8.7278, 8.6560, 8.7646, 8.6933, 8.6226, 8.5524,\n 8.4826, 8.4133, 8.5212, 8.4523, 8.3840, 8.3161, 8.2486, 8.1817,\n 8.2887, 8.3952, 8.5012, 8.6066, 8.7116, 8.8160, 8.9199, 9.0233,\n 8.9565, 9.0593, 8.9929, 9.0952, 9.0292, 8.9635, 8.8982, 9.0000,\n 9.1013, 9.2022, 9.3026, 9.2376, 9.1730, 9.2729, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: According to officials, Jo\u00e3o Bernardo Vieira, the president of Guinea-Bissau, was shot to death on Monday in his palace by renegade soldiers. \"President Vieira was killed by the army as he tried to flee his house which was being attacked by a group of soldiers close to the chief of staff Tagme Na Waie, early this morning,\" Zamora Induta, a military spokesman, said to Agence France-Presse, insisting that \"this was not a coup d'etat.\"\nHypothesis: Bernardo Vieira was the president of Guinea-Bissau.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -1.6729, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.0596, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.3344, -1.3819, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.5181, -1.5614, -1.6045, -1.6473, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.6641, -1.7049, -1.7454, -1.6025, -1.4606,\n -1.3195, -1.3608, -1.4019, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.2879, -1.1513, -1.0155, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -1.1163, -1.1547, -1.0284, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.6531, 6.8229, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.2488, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.0928, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.0000, 9.8987, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.7678, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.4857,\n 11.5917, 11.6971, 11.6059, 11.5156, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.6510, 11.7543, 11.8571, 11.9594, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.9952, 12.9116, 13.0067,\n 13.1014, 13.0185, 12.9363, 12.8546, 12.9491, 13.0431, 13.1367, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.2542, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.5408, 13.6313, 13.7215, 13.8113, 13.7327, 13.8222, 13.9113, 14.0000,\n 14.0884, 14.0106, 14.0986, 14.1863, 14.2737, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Asprin, an inexpensive drug helps protect survivors of heart attack and stroke from subsequent heart attacks and death, and even helps reduce the number of deaths that occur within the first hours following a heart attack.\nHypothesis: People experienced adverse effects while taking aspirin.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, 0.1037, 0.0000,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.9169, 1.1202, 1.3206, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.4000, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, 0.1549, 0.1029, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.4472, 0.5941,\n 0.7399, 0.6881, 0.8325, 0.7807, 0.7293, 0.6783, 0.8208, 0.9623,\n 1.1028, 1.0512, 1.0000, 0.9492, 0.8987, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 1.1094, 1.2441, 1.1942, 1.1447, 1.0954,\n 1.0465, 1.1794, 1.1305, 1.2623, 1.3933, 1.3443, 1.4743, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.4087, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.4713, 1.4241, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.4857, 1.4393, 1.3933, 1.3474, 1.3019, 1.4241, 1.5457, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.4397, 1.5592, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.8301, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 3.0290, 2.9055, 2.7852, 2.6681, 2.8943, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.3614, 4.5461, 4.7281, 4.9075,\n 4.7980, 4.9747, 5.1490, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.4501, 6.5993, 6.4993, 6.4008,\n 6.5483, 6.6944, 6.8391, 6.7416, 6.8849, 6.7886, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.2532, 7.1591, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.7778, 7.6867, 7.5967, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.0219, 8.1481, 8.2733, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.0497, 8.1731, 8.0882, 8.0042, 8.1266, 8.2483, 8.1650,\n 8.2858, 8.4057, 8.5249, 8.4423, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.7482, 8.6677, 8.7831, 8.7033, 8.6241, 8.5456, 8.4678,\n 8.3906, 8.5052, 8.6190, 8.7323, 8.8448, 8.9567, 8.8800, 8.8039,\n 8.9151, 9.0257, 8.9502, 8.8752, 8.8008, 8.9107, 8.8369, 8.9461,\n 9.0548, 8.9815, 8.9086, 9.0167, 8.9444, 9.0518, 9.1587, 9.2651,\n 9.1932, 9.2990, 9.4042, 9.3328, 9.4375, 9.5416, 9.6452, 9.7483,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.2565, 10.3566, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.3439, 10.2743, 10.2050, 10.3038, 10.2350, 10.3333,\n 10.4312, 10.3628, 10.2949, 10.3923, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Leakey believed Kenya's rich wildlife, which underpins a tourist industry worth Dollars 450m (Pounds 308m) a year, could be managed in a profitable and sustainable manner.\nHypothesis: African countries encourage keeping animals alive to attract tourists.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.3859, -1.4241, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.2817, -1.3197, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.5627, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 4.0825, 3.9614, 4.1633, 4.0446, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.3100, 5.1962,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.3578, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.4449, 8.3463, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.6667, 8.7927, 8.9178, 8.8227, 8.7287,\n 8.8529, 8.9763, 8.8833, 9.0057, 9.1273, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.5133, 9.6307, 9.7473, 9.8632, 9.9783, 10.0926, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.6827, 11.7851, 11.8870, 11.9883,\n 12.0891, 12.0032, 12.1036, 12.2034, 12.3027, 12.4015, 12.4998, 12.5976,\n 12.5129, 12.6103, 12.7073, 12.8037, 12.8997, 12.9952, 13.0903, 13.0067,\n 13.1014, 13.1957, 13.2895, 13.3829, 13.4758, 13.5683, 13.4859, 13.5781,\n 13.6698, 13.7612, 13.8522, 13.9427, 14.0329, 13.9515, 14.0414, 14.1309,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 14.8462, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Without a natural greenhouse effect, the temperature of the Earth would be about zero degrees F (-18C) instead of its present 57F (14C).\nHypothesis: Greenhouse effect changes global climate.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "78", + "Fraction of T in Greenlist": "39.2%", + "z-score": "4.62", + "p value": "1.87e-06", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547, 1.5403, 1.3608,\n 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.4142, 1.2702, 1.5852, 1.8889,\n 1.7457, 1.6082, 1.8974, 2.1776, 2.0412, 1.9096, 2.1783, 2.4398, 2.3094,\n 2.1831, 2.4351, 2.3113, 2.5560, 2.7952, 2.6726, 2.5533, 2.4371, 2.3238,\n 2.2133, 2.1054, 2.0000, 1.8970, 1.7963, 2.0211, 1.9215, 2.1412, 2.3570,\n 2.2576, 2.1602, 2.0647, 1.9711, 2.1798, 2.0870, 1.9959, 1.9064, 2.1094,\n 2.0207, 1.9335, 2.1320, 2.3276, 2.5205, 2.4327, 2.6222, 2.8093, 2.9938,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.0924, 3.0071, 2.9231, 3.0984, 3.2717,\n 3.1879, 3.1052, 3.0237, 3.1937, 3.3619, 3.2806, 3.2004, 3.1211, 3.2863,\n 3.4498, 3.3708, 3.2928, 3.2157, 3.1394, 3.0641, 2.9897, 3.1493, 3.0754,\n 3.2332, 3.1597, 3.0870, 3.0151, 3.1704, 3.3243, 3.4768, 3.4047, 3.3333,\n 3.2627, 3.1928, 3.3428, 3.4915, 3.4217, 3.3526, 3.4995, 3.4308, 3.3627,\n 3.5079, 3.6519, 3.5839, 3.5166, 3.6590, 3.8002, 3.7330, 3.6664, 3.8061,\n 3.7399, 3.8784, 4.0158, 3.9497, 4.0859, 4.0202, 3.9549, 3.8903, 3.8262,\n 3.7626, 3.6995, 3.6369, 3.7707, 3.7084, 3.8411, 3.9729, 3.9107, 3.8490,\n 3.9795, 4.1092, 4.2381, 4.1763, 4.1150, 4.0541, 4.1816, 4.1210, 4.0608,\n 4.1872, 4.3128, 4.2527, 4.1931, 4.1338, 4.0750, 4.0166, 3.9586, 4.0825,\n 4.0247, 4.1477, 4.0901, 4.0330, 3.9762, 4.0980, 4.2191, 4.3395, 4.4593,\n 4.4023, 4.5212, 4.4644, 4.5826, 4.7001, 4.8170, 4.7602, 4.7037, 4.6476,\n 4.7635, 4.8787, 4.8227, 4.7670, 4.7117, 4.8260, 4.9397, 4.8845, 4.8295,\n 4.7749, 4.7206, 4.6667, 4.6130, 4.7255, 4.6720, 4.7838, 4.7305, 4.6775,\n 4.6248])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "151", + "Fraction of T in Greenlist": "75.9%", + "z-score": "16.6", + "p value": "5.24e-62", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 5.3072,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.8718, 7.0456, 7.2168, 7.3853, 7.5514, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.8823, 8.7599, 8.8991, 9.0370,\n 9.1735, 9.3086, 9.1890, 9.3231, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.7442, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.1479, 10.0385, 9.9304, 10.0535, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.2061, 10.3257, 10.4444, 10.5623, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.9229, 10.8215, 10.7211, 10.6218, 10.5236, 10.4263, 10.5409,\n 10.4448, 10.5587, 10.6719, 10.7843, 10.8960, 11.0070, 11.1172, 11.2268,\n 11.3357, 11.2414, 11.3497, 11.4574, 11.5645, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.4086, 13.5039, 13.5987, 13.6931,\n 13.6050, 13.6990, 13.7926, 13.8857, 13.9784, 14.0707, 14.1625, 14.2539,\n 14.3449, 14.4355, 14.5257, 14.6155, 14.7049, 14.7939, 14.8825, 14.9707,\n 15.0585, 15.1460, 15.2331, 15.3198, 15.2345, 15.3210, 15.4071, 15.4929,\n 15.4085, 15.4940, 15.5792, 15.6640, 15.7485, 15.8327, 15.9165, 16.0000,\n 16.0832, 16.1660, 16.2486, 16.3308, 16.4127, 16.4943, 16.5755])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: NASA's two Great Observatories, the Hubble Space Telescope and the Chandra X-ray Observatory, have independently provided what could be the best direct evidence yet for the existence of an event horizon, the defining feature of a black hole.\nHypothesis: Hubble discovers black holes.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, -0.6667, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, 0.2085, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.4714,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.1629, 0.3244, 0.4845, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, 0.1502, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.2949, 0.4407, 0.3904, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.3225, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.2146, -0.2568, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.2057, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 1.9096, 1.7823, 1.6590, 1.9245, 2.1831, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.6726, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 3.0929, 2.9856, 2.8804, 3.0861,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.4743, 3.3729, 3.5642, 3.7528,\n 3.6522, 3.8376, 3.7383, 3.9208, 3.8228, 3.7264, 3.9056, 3.8103,\n 3.7166, 3.6242, 3.8000, 3.9736, 4.1451, 4.3146, 4.2222, 4.3894,\n 4.5547, 4.7181, 4.8797, 5.0395, 5.1977, 5.3541, 5.5090, 5.4160,\n 5.3243, 5.4772, 5.6286, 5.5377, 5.6875, 5.8358, 5.9827, 6.1283,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.6066, 6.7456, 6.6559, 6.5672,\n 6.4795, 6.3928, 6.3070, 6.2222, 6.3595, 6.4957, 6.6308, 6.7648,\n 6.8977, 7.0296, 7.1605, 7.0759, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.2858, 8.4057, 8.3231, 8.2413, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.7908, 9.8995, 9.8197, 9.7405,\n 9.6619, 9.5840, 9.5066, 9.6148, 9.7224, 9.8293, 9.7526, 9.6764,\n 9.7828, 9.7072, 9.6322, 9.7380, 9.8433, 9.7688, 9.6948, 9.7996,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.1398, 10.2419, 10.1690, 10.2706,\n 10.1981, 10.2993, 10.3999, 10.3280, 10.4281, 10.3566, 10.2856, 10.3853,\n 10.3148, 10.4140, 10.5128, 10.6111, 10.7090, 10.8064, 10.9034, 10.8333,\n 10.7637, 10.8602, 10.9564, 11.0521, 10.9829, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: For US pharmaceutical companies, the impending healthcare reforms promise an era of increased cost-containment and pricing controls.\nHypothesis: The US government wants to keep drug prices down.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -2.1866, -1.9604, -2.0156, -2.0702, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -1.8257, -1.8762, -1.9262, -1.9757, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.0461, -2.0918, -2.1372, -1.9640,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.2528, -2.2943, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.8098,\n -2.6605, -2.6984, -2.7361, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.6681, -2.7050, -2.5620, -2.5990, -2.6359, -2.4944,\n -2.5315, -2.5683, -2.6049, -2.6414, -2.6776, -2.7137, -2.5750, -2.6112,\n -2.6472, -2.6830, -2.5460, -2.5820, -2.6178, -2.6534, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.7358, -2.7701, -2.8043, -2.6737, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.7219, 1.5492, 1.8898, 1.7233, 2.0466, 1.8856,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.4836, 4.3618, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.4413, 6.6030, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.5653, 7.7047, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.5818, 7.7174, 7.6210, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.3164, 8.2222, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.8228, 8.7327, 8.6436,\n 8.5553, 8.4679, 8.3813, 8.5030, 8.6238, 8.7439, 8.6581, 8.7773,\n 8.8958, 8.8108, 8.7267, 8.6433, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.7482, 8.8636, 8.9783, 8.8978, 8.8179, 8.9319, 9.0452,\n 8.9660, 9.0786, 9.1905, 9.3017, 9.2232, 9.3338, 9.4438, 9.3659,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.2790, 10.2029, 10.3065, 10.4097, 10.3341,\n 10.4367, 10.3617, 10.4638, 10.3893, 10.4909, 10.5921, 10.6927, 10.6187,\n 10.7189, 10.8186, 10.9178, 11.0165, 10.9431, 10.8702, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.3091, 11.4047, 11.3333,\n 11.2624, 11.1919, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Giuliana Sgrena was freed from captivity in Iraq today. The Italian journalist was abducted in Baghdad exactly a month ago while she had been reporting for Il Manifesto. The Islamic Jihad Organisation had taken her hostage on the 4th February with the demand that Italy withdraw all of its troops from Iraq. On February 16, a video was released with a clearly distraught Mrs Sgrena begging for Italy to withdraw from Iraq. However despite the video, the Italian Senate voted to extend its miltary forces' stay in Iraq.\nHypothesis: A female journalist was kidnapped in Iraq.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.3120, -2.3564, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.6550, -2.6961, -2.7369, -2.5717, -2.6128, -2.4495,\n -2.2875, -2.3293, -2.3708, -2.4121, -2.2528, -2.2943, -2.1367, -2.1783,\n -2.0224, -2.0642, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.9263, -2.9611, -2.9957, -3.0302, -3.0645,\n -3.0987, -2.9633, -2.8287, -2.8633, -2.8977, -2.9320, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.9355, -2.9692, -3.0028, -2.8721, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.5176, 3.7626, 3.6108, 3.4641, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.8772, 4.0980, 3.9620, 3.8297, 4.0451, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.6082, 4.8038, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.1118, 5.9954, 5.8812, 6.0469, 5.9346, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.6667, 5.8279, 5.9874, 5.8835, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.7469, 6.6469,\n 6.7931, 6.9378, 7.0812, 6.9824, 7.1243, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.6210, 7.5258, 7.4316, 7.5661, 7.4730,\n 7.3810, 7.5143, 7.4233, 7.5556, 7.4655, 7.5967, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.2956, 8.2107, 8.1266, 8.2483, 8.3691,\n 8.2858, 8.4057, 8.5249, 8.4423, 8.5607, 8.4788, 8.5964, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.7831, 8.8978, 9.0117, 9.1250, 9.0452,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.4752, 9.3979, 9.3212, 9.2450, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.6008, 9.7072, 9.8131, 9.9184, 10.0231, 10.1273, 10.0523, 9.9778,\n 9.9038, 9.8303, 9.7574, 9.8611, 9.9642, 10.0668, 9.9944, 9.9224,\n 10.0245, 9.9531, 9.8821, 9.9837, 10.0848, 10.0143, 10.1149, 10.2151,\n 10.3148, 10.2447, 10.3439, 10.4427, 10.3730, 10.4713, 10.4021, 10.3333,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.4893, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: About 33.5 million people live in this massive conurbation. I would guess that 95% of the 5,000 officially foreign-capital firms in Japan are based in Tokyo.\nHypothesis: About 33.5 miilion people live in Tokyo.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "72", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.64", + "p value": "0.000135", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774, 0.9802, 1.3608,\n 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714, 0.8083, 0.6794, 1.0000,\n 1.3093, 1.1793, 1.0541, 1.3480, 1.6330, 1.5076, 1.3862, 1.2687, 1.1547,\n 1.0441, 0.9366, 1.2019, 1.0954, 1.3525, 1.2472, 1.1446, 1.0445, 0.9467,\n 0.8513, 0.7579, 1.0000, 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 1.0999,\n 1.0120, 0.9258, 1.1471, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 1.2366,\n 1.1547, 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164, 0.7057,\n 0.6376, 0.8238, 1.0079, 0.9393, 1.1206, 1.2999, 1.4771, 1.4076, 1.5823,\n 1.5131, 1.6854, 1.6164, 1.5483, 1.4809, 1.4142, 1.5828, 1.5164, 1.4506,\n 1.3856, 1.3213, 1.4863, 1.6498, 1.8116, 1.7467, 1.9066, 1.8419, 1.7778,\n 1.7143, 1.6514, 1.5892, 1.5275, 1.6837, 1.6222, 1.7767, 1.7154, 1.6547,\n 1.8071, 1.9582, 2.1082, 2.0470, 2.1954, 2.1344, 2.0739, 2.0140, 1.9545,\n 1.8956, 2.0412, 1.9825, 2.1268, 2.0682, 2.0101, 2.1527, 2.2943, 2.4348,\n 2.3764, 2.5156, 2.4574, 2.5954, 2.5373, 2.4797, 2.4225, 2.5589, 2.5019,\n 2.4453, 2.3891, 2.3333, 2.4678, 2.6014, 2.7341, 2.6781, 2.8098, 2.7539,\n 2.6984, 2.6433, 2.5886, 2.5343, 2.6640, 2.6099, 2.5560, 2.6846, 2.6309,\n 2.7585, 2.8853, 3.0114, 2.9575, 3.0827, 3.0290, 2.9756, 3.0997, 3.0464,\n 2.9935, 2.9410, 3.0638, 3.0114, 2.9593, 3.0811, 3.0292, 3.1502, 3.2705,\n 3.3902, 3.3381, 3.4570, 3.4050, 3.3534, 3.4713, 3.4198, 3.3686, 3.3177,\n 3.4346, 3.3838, 3.3333, 3.2831, 3.2332, 3.3489, 3.4641, 3.5787, 3.5286,\n 3.6425])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.5260, 4.7336, 4.9373, 4.8003, 5.0000,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.1355, 6.3058, 6.1828, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.0404, 5.9333, 6.0928, 6.2505, 6.1450, 6.0410, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.3233, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.4444, 8.3503, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.4501, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.2348, 10.3445, 10.2592, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.1692, 10.2766, 10.3835, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.1588, 11.2602, 11.3610, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.3249, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.8014, 11.7261, 11.6514, 11.7473,\n 11.8429, 11.9380, 12.0327, 12.1270, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.3525, 12.4448, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Tropical Storm Irene on August 11, 2005 at 16:15 UTC. Tropical Storm Irene will increase in strength over the next several days, possibly developing into a hurricane that will hit the east coast of the United States, said the National Hurricane Center of Miami, Florida in a report today. Irene was located approximately 975 kilometers south-southeast of Bermuda at 16:00 UTC today. Forecasters say that the storm is now moving in a west- northwest direction with top sustained winds of 40 miles per hour.\nHypothesis: A storm called Irene is going to approach the east coast of the US.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.1333, 1.3245, 1.2501, 1.1767, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.8729,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.2611, 1.4071, 1.5519, 1.4963, 1.4410, 1.3862,\n 1.3318, 1.4744, 1.6160, 1.7566, 1.7018, 1.6473, 1.7864, 1.9245,\n 2.0617, 2.1980, 2.3333, 2.4678, 2.4122, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.3262, 2.4578, 2.5886, 2.5343, 2.4803, 2.4267, 2.5560,\n 2.5026, 2.4495, 2.3967, 2.5247, 2.6519, 2.5990, 2.5466, 2.4944,\n 2.6203, 2.5683, 2.5166, 2.4653, 2.4142, 2.5386, 2.6623, 2.6112,\n 2.5604, 2.5099, 2.4597, 2.5820, 2.7036, 2.8245, 2.7741, 2.7240,\n 2.6742, 2.6247, 2.7443, 2.8633, 2.8137, 2.7644, 2.7154, 2.6667,\n 2.6182, 2.5700, 2.6874, 2.6393, 2.5915, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "95", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "67.4%", + "z-score": "9.54", + "p value": "7.36e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.7543, 6.9570, 7.1550, 6.9402,\n 7.1358, 7.3271, 7.5144, 7.3131, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 7.8360, 8.0076, 8.1763, 8.3423, 8.1689, 8.3333,\n 8.1654, 8.3283, 8.1654, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.6461, 8.5010, 8.3589, 8.2195, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.6948, 8.8389, 8.7093,\n 8.8522, 8.7250, 8.8667, 8.7419, 8.8823, 9.0213, 8.8991, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.0711, 8.9550, 9.0896, 8.9753, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.4088, 9.5368])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: One of the dead was a child, passing by with his parents, said Iqrar Abbasi, a doctor at Civil Hospital Karachi.\nHypothesis: A doctor was killed by his parents.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.4190, -1.4765, -1.5333, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.7937, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 5.7664, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.7269, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.6803, 6.8419, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.1909, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.4146, 7.3073, 7.4521, 7.3464, 7.2421, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.5818, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.3274, 8.2372, 8.3625, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.3813, 8.5030, 8.4173, 8.3324, 8.2483, 8.3691,\n 8.4891, 8.4057, 8.5249, 8.4423, 8.5607, 8.6783, 8.5964, 8.5153,\n 8.6321, 8.7482, 8.8636, 8.7831, 8.8978, 9.0117, 8.9319, 8.8527,\n 8.7742, 8.8874, 9.0000, 8.9221, 9.0340, 8.9567, 8.8800, 8.9912,\n 8.9151, 8.8396, 8.9502, 9.0601, 9.1694, 9.0944, 9.2032, 9.3113,\n 9.2368, 9.1629, 9.0895, 9.1970, 9.3040, 9.2311, 9.3374, 9.2651,\n 9.3708, 9.4761, 9.4042, 9.3328, 9.4375, 9.5416, 9.6452, 9.5743,\n 9.5038, 9.6069, 9.5369, 9.4673, 9.3982, 9.5007, 9.6028, 9.7043,\n 9.8054, 9.7367, 9.8373, 9.9374, 10.0371, 10.1363, 10.2350, 10.1667,\n 10.0987, 10.1970, 10.1295, 10.2273, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Cyrillic alphabet is an alphabet used for several East and South Slavic languages; (Belarusian, Bulgarian, Macedonian, Russian, Rusyn, Serbian, and Ukrainian) and many other languages of the former Soviet Union, Asia and Eastern Europe. It has also been used for other languages in the past. Not all letters in the Cyrillic alphabet are used in every language which is written with it.\nHypothesis: Cyrillic is an alphabet used for certain Slavic languages, such as Russian.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.0793, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.3303, -0.1644, 0.0000,\n 0.1629, 0.1081, 0.2692, 0.4288, 0.3736, 0.5315, 0.6880, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.8296, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.5507, 0.4988, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.5855, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.4174, 0.3698, 0.5069, 0.4593, 0.4121, 0.5477,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.5991, 0.7255, 0.8513,\n 0.8065, 0.9313, 0.8866, 0.8422, 0.7979, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.9119, 1.0336, 1.1547, 1.1106, 1.0668, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "181", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "49.7%", + "z-score": "7.68", + "p value": "7.85e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.0381, 2.3333, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.9212,\n 2.7952, 2.6726, 2.9055, 2.7852, 2.6681, 2.5538, 2.4422, 2.6667,\n 2.5568, 2.4495, 2.6679, 2.8823, 3.0929, 3.2998, 3.1918, 3.0861,\n 3.2883, 3.4873, 3.6831, 3.5777, 3.4743, 3.6662, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.0205, 3.9208, 4.1008, 4.0024, 4.1797, 4.0825,\n 4.2571, 4.4296, 4.3333, 4.5034, 4.6715, 4.8375, 5.0017, 5.1640,\n 5.0679, 4.9731, 5.1332, 5.0395, 5.1977, 5.1051, 5.0138, 4.9237,\n 4.8347, 4.9904, 4.9023, 4.8154, 4.9691, 4.8830, 5.0350, 4.9497,\n 5.1000, 5.0156, 5.1643, 5.3116, 5.4576, 5.6023, 5.5181, 5.6614,\n 5.8034, 5.9442, 5.8605, 5.7778, 5.9171, 5.8351, 5.9732, 5.8919,\n 5.8114, 5.7318, 5.6530, 5.5750, 5.7112, 5.8464, 5.9806, 5.9029,\n 5.8260, 5.9589, 6.0908, 6.2217, 6.1451, 6.0693, 6.1990, 6.3278,\n 6.4558, 6.5828, 6.5072, 6.4322, 6.3580, 6.4838, 6.4101, 6.5350,\n 6.4618, 6.5857, 6.7089, 6.8313, 6.9529, 6.8799, 7.0007, 7.1207,\n 7.2399, 7.1673, 7.0952, 7.2136, 7.1421, 7.2596, 7.1886, 7.1181,\n 7.0481, 6.9786, 7.0952, 7.0262, 6.9577, 7.0735, 7.0054, 7.1204,\n 7.0527, 7.1670, 7.0998, 7.2134, 7.3263, 7.4386, 7.5504, 7.4833,\n 7.5944, 7.7048, 7.8147, 7.7480, 7.6816, 7.7908, 7.7249, 7.8335,\n 7.7679, 7.7028, 7.6381, 7.5738, 7.6816])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: In November 1990, the president announced that opposition political parties would be permitted to organize in 1991. Several new parties emerged, including the Democratic Republican Movement (MDR), the Liberal Party (LP), the Democratic and Socialist Party (PSD), and the Coalition for the Defense of the Republic (CDR).\nHypothesis: Several new political parties emerged.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -2.3850, -2.4495,\n -2.5126, -2.5744, -2.6349, -2.6943, -2.7526, -2.8098, -2.8660, -2.5560,\n -2.2542, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.2998, -2.0428, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.1320, -1.9044, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.0656,\n -1.8604, -1.6577, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.0247, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.0000, -1.8249, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -1.9863, -1.8220, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -1.9803,\n -1.8251, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -1.9799, -1.8324, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -1.9837, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -1.9906, -1.8556, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -2.0578, -2.0943, -2.1306, -2.0000,\n -1.8701, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.3708, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 5.9017, 6.0849, 5.9479, 5.8140, 5.6830, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.1355, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.9086, 8.8007, 8.9324, 9.0629, 8.9567,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.4327, 9.5556, 9.6775, 9.5784, 9.4803, 9.6016,\n 9.5047, 9.6251, 9.7447, 9.8634, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.0698, 10.1840, 10.0926, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.5841, 10.6936,\n 10.8025, 10.7141, 10.6265, 10.7349, 10.6481, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.2589, 11.3616, 11.4638, 11.5655, 11.6666, 11.5841, 11.6847,\n 11.7849, 11.7031, 11.8028, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.4223, 12.5179, 12.4384, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.9691, 12.8928, 12.8169, 12.9087, 13.0000,\n 13.0910, 13.1815, 13.2717, 13.1966, 13.2864, 13.3759, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A closely divided U.S. Supreme Court said on Thursday its 2002 ruling that juries and not judges must impose a death sentence applies only to future cases, a decision that may affect more than 100 death row inmates.\nHypothesis: The Supreme Court decided that only judges can impose the death sentence.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "10", + "Fraction of T in Greenlist": "12.0%", + "z-score": "-2.73", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.4910, -2.5556, -2.6186, -2.6803, -2.7406, -2.7998, -2.8577,\n -2.9146, -2.9704, -3.0253, -3.0792, -3.1322, -3.1844, -3.2358, -3.2863,\n -3.3362, -3.3853, -3.4338, -3.1334, -3.1845, -3.2348, -3.2844, -3.3333,\n -3.3816, -3.4293, -3.4763, -3.5228, -3.2515, -3.2998, -3.0361, -3.0861,\n -3.1353, -3.1840, -3.2321, -3.2796, -3.3265, -3.3729, -3.4187, -3.1754,\n -3.2225, -3.2691, -3.0330, -3.0806, -3.1277, -2.8983, -2.6722, -2.7217,\n -2.7705, -2.8189, -2.8667, -2.9140, -2.9608, -3.0071, -3.0530, -3.0984,\n -2.8868, -2.6778, -2.7250])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 1.7233, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 2.1776, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.3094, 2.1831, 2.4351, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.8823, 2.7757, 2.9856, 2.8804, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.9814, 3.1787, 3.0796, 3.2733, 3.4641,\n 3.6522, 3.8376, 3.7383, 3.6407, 3.8228, 3.7264, 3.9056, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.9736, 3.8819, 4.0531, 3.9624, 4.1312,\n 4.0415, 3.9530, 3.8657, 3.7796, 3.9452, 4.1090, 4.2710, 4.1851,\n 4.3451, 4.5035, 4.6603, 4.8154, 4.7296, 4.6448, 4.7980, 4.9497,\n 5.1000, 5.0156, 5.1643, 5.3116, 5.4576, 5.6023, 5.7457, 5.8878,\n 6.0288, 6.1685, 6.0838, 6.0000, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.0287, 6.1644, 6.0837, 6.0038, 5.9247, 5.8464, 5.9806, 6.1137,\n 6.2459, 6.1680, 6.2990, 6.4291, 6.5583, 6.6865, 6.6089, 6.5320,\n 6.6591, 6.7854, 6.9107, 6.8343, 6.9587, 7.0823, 7.0063, 7.1291,\n 7.2510, 7.1755, 7.1007, 7.0265, 6.9529, 7.0737, 7.1938, 7.3131,\n 7.2399, 7.3584, 7.4762, 7.5933, 7.7096, 7.6368, 7.5644, 7.6800,\n 7.7949, 7.9091, 7.8372, 7.9507, 8.0636, 7.9921, 8.1043, 8.2158,\n 8.1448, 8.0742, 8.0042, 7.9347, 8.0455, 8.1556, 8.2652, 8.1960,\n 8.3050, 8.4133, 8.5212, 8.6284, 8.5595, 8.4911, 8.5978, 8.7039,\n 8.8094, 8.7414, 8.8464, 8.9509, 8.8832, 8.9872, 9.0906, 9.0233,\n 8.9565, 8.8900, 8.8240, 8.9268, 9.0292, 9.1310, 9.0653, 9.1667,\n 9.2676, 9.3680, 9.4680, 9.4026, 9.3375, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Disneyland of Europe is located 20 miles east of Paris. Euro Disney is a huge complex with hotels, restaurants, shops and a nearby golf course apart from the actual theme park. It provides a much better experience than its American cousin thanks to the marvels of modern engineering.\nHypothesis: Euro-Disney is an Entertainment Park.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.2222, -0.0553, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.5680, 0.5143, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.4915, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.6825, 0.8165, 0.9497, 0.9017, 1.0338, 1.1651, 1.1169, 1.2472,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.1852, 1.3131, 1.2657, 1.2185,\n 1.1717, 1.2982, 1.2514, 1.2049, 1.3303, 1.2839, 1.2377, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 1.1339, 1.0890, 1.2115, 1.3333,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.0751, 5.9186, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.4790, 7.3467, 7.5056,\n 7.6624, 7.8174, 7.9704, 8.1216, 8.2711, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.9935, 9.1333, 9.0068, 8.8823, 8.7599, 8.6393, 8.5206,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.3901, 9.5191, 9.6470, 9.7738, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.0855, 9.9817, 9.8792, 10.0000, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.7678, 10.8801, 10.9917, 11.1026, 11.0070, 10.9123, 10.8186,\n 10.7257, 10.6338, 10.7444, 10.8544, 10.9637, 11.0724, 10.9816, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.5156, 11.6206, 11.7249, 11.8287, 11.7395,\n 11.8427, 11.7543, 11.8571, 11.7696, 11.8719, 11.9737, 12.0749, 11.9883,\n 11.9024, 12.0032, 12.1036, 12.2034, 12.3027, 12.4015, 12.3167, 12.4150,\n 12.3309, 12.2474, 12.1646, 12.2627, 12.1805, 12.2782, 12.3754, 12.4722,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.6930, 12.7876, 12.8817,\n 12.9755, 12.8957, 12.9891, 13.0821, 13.1746, 13.2668, 13.1878, 13.2796,\n 13.3710, 13.4620, 13.3838, 13.3060, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.7801, 13.7042, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The chaotic situation unleashed in Bogota last night, with the assasination of Justice Carlos Valencia, began on 28 July in Medellin, when motorized paid assasins murdered third public order Judge Maria Elena Diaz.\nHypothesis: Justice Carlos Valencia was killed in Medellin.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.4631, -1.5323, -1.6001, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.7823,\n -1.6278, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.3472,\n -1.3904, -1.4335, -1.2857, -1.1390, -0.9933, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.2210, -1.2623, -1.3035, -1.1651, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660, 3.6566,\n 3.2863, 2.9593, 2.6667, 3.0424, 2.7775, 3.1305, 3.4641, 3.7808, 3.5382,\n 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426, 4.0415, 4.3027, 4.1111,\n 3.9279, 4.1812, 4.4272, 4.6663, 4.4907, 4.7237, 4.9507, 4.7819, 5.0037,\n 5.2204, 5.4322, 5.2697, 5.1121, 5.3199, 5.5234, 5.3708, 5.2223, 5.4222,\n 5.2778, 5.1371, 5.0000, 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.2463, 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009,\n 5.7735, 5.9438, 5.8275, 5.9954, 5.8812, 5.7689, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968, 6.3509,\n 6.5033, 6.4006, 6.5514, 6.4501, 6.3502, 6.4993, 6.6469, 6.7931, 6.6944,\n 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.3068, 7.2104, 7.1152, 7.2532,\n 7.3901, 7.2960, 7.2029, 7.3386, 7.2466, 7.1556, 7.0657, 7.2001, 7.1111,\n 7.2443, 7.1563, 7.0692, 7.2012, 7.3322, 7.2459, 7.1605, 7.2904, 7.2058,\n 7.1220, 7.0391, 7.1678, 7.0857, 7.2134, 7.1319, 7.0513, 7.1779, 7.3037,\n 7.2236, 7.1443, 7.2691, 7.1904, 7.1125, 7.0353, 7.1590, 7.0823, 7.2051,\n 7.1291, 7.0537, 7.1755, 7.2966, 7.2217, 7.1474, 7.2675, 7.1938, 7.1207,\n 7.0481, 7.1673, 7.0952, 7.2136, 7.1421, 7.0711, 7.1886, 7.3054, 7.2348,\n 7.1647, 7.2807, 7.2111, 7.1420, 7.0735, 7.1885, 7.1204, 7.2348, 7.1670,\n 7.0998, 7.2134, 7.3263, 7.2594, 7.1929, 7.3051, 7.2391, 7.1735, 7.1083,\n 7.2197, 7.1549, 7.2656, 7.2012, 7.1372, 7.2472, 7.3566, 7.2929, 7.2296,\n 7.3383, 7.2753, 7.2127, 7.1506, 7.2585, 7.1967, 7.3041, 7.2425, 7.1813,\n 7.2881, 7.3943, 7.3333, 7.2728, 7.3783, 7.3180, 7.2581, 7.1985, 7.3034,\n 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The medical student accused of murdering an erotic masseuse he met on Craigslist is drowning in more than $100,000 in student loan debt and is so broke he can't afford to pay an attorney, according to court papers. Philip Markoff, a 23-year-old suspended Boston University medical school student, owes $130,000 in student loans and does not get money from his parents, leaving him to lean on a taxpayer-funded attorney for his defense, according to a court document in Boston Municipal Court that labels him indigent. Markoff graduated from the State University of New York-Albany and was a second-year medical student at BU.\nHypothesis: The medical student Philip Markoff was engaged.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "17", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-0.7", + "p value": "0.758", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "90", + "Fraction of T in Greenlist": "45.2%", + "z-score": "6.59", + "p value": "2.21e-11", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.4398, 2.3094, 2.5627, 2.8098, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.5533, 2.7852, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.4293, 3.3147, 3.2026, 3.4101, 3.6141, 3.5032, 3.3947,\n 3.5942, 3.4873, 3.6831, 3.5777, 3.4743, 3.3729, 3.2733, 3.4641,\n 3.3657, 3.2691, 3.1741, 3.0806, 3.2667, 3.4503, 3.6315, 3.8103,\n 3.7166, 3.6242, 3.5333, 3.7087, 3.8819, 4.0531, 4.2222, 4.3894,\n 4.2981, 4.2080, 4.1192, 4.2836, 4.1957, 4.3580, 4.5186, 4.6775,\n 4.8347, 4.7469, 4.6603, 4.5747, 4.4901, 4.4066, 4.5611, 4.7140,\n 4.8655, 5.0156, 4.9322, 4.8497, 4.7682, 4.6876, 4.8355, 4.9820,\n 5.1273, 5.0469, 4.9675, 4.8889, 5.0323, 4.9543, 4.8772, 4.8008,\n 4.7252, 4.8666, 5.0070, 4.9317, 4.8572, 4.9960, 4.9221, 5.0596,\n 4.9862, 4.9135, 4.8414, 4.7700, 4.9058, 4.8348, 4.7645, 4.8990,\n 5.0325, 5.1650, 5.0948, 5.0252, 5.1564, 5.0873, 5.0187, 4.9507,\n 4.8833, 5.0130, 5.1419, 5.2699, 5.3970, 5.3295, 5.4557, 5.3886,\n 5.3220, 5.2560, 5.1905, 5.3153, 5.2501, 5.3740, 5.4971, 5.6195,\n 5.5544, 5.4899, 5.4257, 5.3621, 5.4832, 5.6036, 5.7234, 5.8424,\n 5.7787, 5.8969, 5.8336, 5.7707, 5.7082, 5.6462, 5.7633, 5.7016,\n 5.8179, 5.9336, 6.0487, 5.9871, 5.9258, 5.8650, 5.8046, 5.9186,\n 6.0321, 6.1449, 6.2572, 6.1968, 6.3084, 6.2482, 6.1884, 6.1290,\n 6.0700, 6.1807, 6.1219, 6.2319, 6.3414, 6.4504, 6.3917, 6.3333,\n 6.2753, 6.2177, 6.3258, 6.4333, 6.5404, 6.6469, 6.5893])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The west has preferred to focus on endangered animals, rather than endangered humans. African elephants are hunted down and stripped of tusks and hidden by poachers. Their numbers in Africa slumped from 1.2m to 600,000 in a decade until CITES - the Convention on International Trade in Endangered Species - banned the trade in ivory.\nHypothesis: African elephants are endangered by ivory poachers.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.6353, 1.8728, 1.7685, 2.0000,\n 1.8970, 2.1229, 2.0211, 2.2418, 2.1412, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.7889, 1.7002, 1.6131, 1.5275, 1.7321,\n 1.9335, 2.1320, 2.3276, 2.2404, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.0948, 2.0135, 2.2000, 2.3842, 2.3028, 2.2226, 2.4034, 2.3238,\n 2.5019, 2.6778, 2.5983, 2.5198, 2.6928, 2.6148, 2.5378, 2.4618,\n 2.3868, 2.3126, 2.2393, 2.1669, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.2862, 2.2162, 2.1470, 2.0785, 2.2405, 2.1723, 2.1049, 2.0381,\n 1.9720, 1.9066, 1.8419, 2.0000, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.6547, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.8664, 1.8091, 1.7522, 1.6958, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.4201, 1.5614, 1.7018, 1.8411, 1.7864, 1.9245,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.6554, 1.7913, 1.9263, 1.8732,\n 1.8204, 1.9540, 1.9013, 2.0339, 2.1656, 2.1128, 2.0604, 2.1909,\n 2.1386, 2.0866, 2.0350, 1.9837, 1.9327, 1.8821, 1.8317, 1.9599,\n 1.9097, 1.8598, 1.9868, 1.9370, 2.0631, 2.0134, 1.9640, 1.9149,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.8453, 1.7974, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.7310, 1.8527, 1.9738, 1.9267, 1.8799, 2.0000,\n 2.1195, 2.0726, 2.1913, 2.3094, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 4.7237, 4.5547, 4.7819, 5.0037, 4.8407, 5.0576, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.0779, 4.9373, 5.1371, 5.0000,\n 4.8662, 4.7357, 4.9316, 4.8038, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.4909, 5.3825, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.7242, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.7382, 5.6401, 5.7955, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.6944, 6.5970, 6.5008, 6.6454, 6.5504, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.2532, 7.3901, 7.5258, 7.6603, 7.7937, 7.6995,\n 7.8318, 7.9630, 8.0931, 8.0000, 7.9079, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.4868, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.4679, 8.5896, 8.5030, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.8958, 8.8108, 8.7267, 8.8443, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.7590, 9.6757, 9.7869, 9.8975, 10.0074,\n 9.9249, 9.8431, 9.9524, 9.8712, 9.9800, 10.0881, 10.0076, 9.9278,\n 10.0353, 9.9562, 10.0631, 9.9846, 9.9067, 9.8293, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 10.0987, 10.0231, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.7671, 10.8673, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.0165, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.0913, 11.0194, 11.1164, 11.0450, 11.1415, 11.2376, 11.1667,\n 11.2624, 11.1919, 11.2872, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The extension of the effective period of marketing exclusivity for drugs is designed to give pharmaceutical companies a fair return.\nHypothesis: Prospective drugs must have long market life after regulatory approval in order to be developed.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -1.8204, -1.9052, -1.9868, -2.0656, -2.1418, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.4371, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.1019, -2.1602,\n -2.2177, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -1.9640, -2.0207,\n -2.0767, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.9988, -3.0429, -3.0867, -3.1300, -2.9336, -2.9775, -3.0210, -3.0641,\n -3.1069, -2.9161, -2.9593, -3.0022, -3.0448, -2.8583, -2.9013, -2.9439,\n -2.9862, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.0827, -2.9109, -2.9515,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.7775, -2.8177, -2.8577,\n -2.8975, -2.9369, -2.7744, -2.8141, -2.8536, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.6934, -2.7325, -2.7714, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.9600, -2.9971,\n -3.0339, -3.0706, -3.1071, -3.1433, -3.1794, -3.2152, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.5555, -3.4140, -3.4478, -3.4816,\n -3.5151, -3.3754, -3.4091, -3.4427, -3.4760, -3.3381, -3.3716, -3.4050,\n -3.4383, -3.4713, -3.5043, -3.5370, -3.5697, -3.6021, -3.6345, -3.6667,\n -3.6987, -3.5648, -3.4316, -3.4641, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868, 2.6605, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998, 3.1177, 2.9439, 2.7778,\n 3.0551, 2.8947, 3.1623, 3.0072, 2.8577, 2.7136, 2.5744, 2.4398, 2.3094,\n 2.5627, 2.4351, 2.6811, 2.5560, 2.7952, 2.6726, 2.9055, 3.1334, 3.0123,\n 3.2348, 3.4528, 3.6667, 3.5466, 3.7559, 3.9614, 3.8431, 4.0446, 4.2426,\n 4.4374, 4.6291, 4.5118, 4.3970, 4.2844, 4.1740, 4.3614, 4.2528, 4.4371,\n 4.6188, 4.5115, 4.6904, 4.5847, 4.7610, 4.9348, 5.1065, 5.0019, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.5034, 4.6715, 4.8375, 4.7419, 4.9058, 4.8113,\n 4.9731, 4.8797, 5.0395, 5.1977, 5.3541, 5.2614, 5.4160, 5.5691, 5.7207,\n 5.6286, 5.7785, 5.9270, 5.8358, 5.9827, 6.1283, 6.0380, 5.9488, 5.8606,\n 5.7735, 5.9172, 5.8310, 5.7457, 5.8878, 6.0288, 6.1685, 6.0838, 6.0000,\n 6.1383, 6.0553, 5.9732, 5.8919, 5.8114, 5.9481, 6.0837, 6.2183, 6.3517,\n 6.4842, 6.6157, 6.7462, 6.8757, 6.7952, 6.7155, 6.8439, 6.7648, 6.8922,\n 7.0187, 7.1443, 7.0658, 6.9879, 7.1125, 7.2363, 7.1590, 7.2818, 7.4039,\n 7.3271, 7.2510, 7.3721, 7.4924, 7.4168, 7.3419, 7.4613, 7.5800, 7.5056,\n 7.6235, 7.5495, 7.6667, 7.5933, 7.5204, 7.6368, 7.7524, 7.8673, 7.9816,\n 7.9091, 8.0227, 8.1356, 8.2479, 8.1758, 8.1043, 8.0333, 8.1448, 8.2557,\n 8.3660, 8.4757, 8.5848, 8.5141, 8.6226, 8.7305, 8.6603, 8.7676, 8.8744,\n 8.8045, 8.7351, 8.8413, 8.7724, 8.8780, 8.9830, 9.0876, 9.1916, 9.2952,\n 9.2265, 9.1584, 9.0906, 9.0233, 8.9565, 8.8900, 8.8240, 8.9268, 9.0292,\n 8.9635, 8.8982, 9.0000, 9.1013, 9.2022, 9.3026, 9.4026, 9.3375, 9.2729,\n 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Hepburn's family will receive the proceeds from the sale.\nHypothesis: Proceeds go to Hepburn's family.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.4021, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.6083, 0.5505, 0.7124, 0.6547,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 1.0370, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 1.0812, 1.2243, 1.1711, 1.1183, 1.2597, 1.2070, 1.1547,\n 1.1028, 1.0512, 1.0000, 0.9492, 1.0879, 1.0371, 0.9867, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.7878, 0.9165, 1.0445,\n 0.9981, 0.9520, 1.0788, 1.0328, 0.9870, 0.9415, 1.0670, 1.0215,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.7816, 0.7385, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "155", + "Fraction of T in Greenlist": "77.9%", + "z-score": "17.2", + "p value": "7.86e-67", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 7.3113, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.4857, 8.6424, 8.7970, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.6461, 8.5010, 8.6522, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.4565, 10.5838, 10.7099, 10.5830, 10.7084, 10.8328, 10.9560, 11.0782,\n 11.1994, 11.3196, 11.4388, 11.5570, 11.6743, 11.7907, 11.6693, 11.5494,\n 11.6656, 11.7809, 11.6632, 11.5470, 11.6620, 11.5476, 11.6620, 11.7757,\n 11.8885, 11.7762, 11.8885, 12.0000, 12.1107, 12.2207, 12.3299, 12.2202,\n 12.3289, 12.4370, 12.5443, 12.6509, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.2768, 13.1707, 13.2730, 13.3747, 13.4758, 13.5764, 13.6763,\n 13.7757, 13.8745, 13.9728, 14.0705, 14.1677, 14.0649, 13.9630, 14.0601,\n 14.1567, 14.0561, 13.9565, 14.0530, 13.9544, 14.0505, 14.1462, 14.2413,\n 14.1442, 14.2390, 14.3333, 14.4272, 14.5206, 14.6135, 14.5181, 14.6107,\n 14.7029, 14.7947, 14.8860, 14.9769, 15.0674, 15.1574, 15.2470, 15.3362,\n 15.4250, 15.3320, 15.4206, 15.5087, 15.5965, 15.6839, 15.7709, 15.8575,\n 15.9437, 16.0296, 16.1151, 16.2003, 16.1095, 16.0194, 16.1045, 16.1892,\n 16.1000, 16.0115, 16.0961, 16.0083, 16.0928, 16.1769, 16.2607, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.5062, 16.5884, 16.5028, 16.5849, 16.6667,\n 16.7481, 16.8292, 16.9101, 16.9906, 17.0708, 17.1507, 17.2304])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Democrat members of the Ways and Means Committee, where tax bills are written and advanced, do not have strong small business voting records.\nHypothesis: Democrat members had strong small business voting records.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.5505, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.7201, 0.8709, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.9366,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.8755, 1.0105, 1.1447, 1.0954,\n 1.0465, 0.9979, 0.9497, 0.9017, 0.8540, 0.8066, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.8805, 0.8340, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 0.9062, 1.0328, 0.9870, 0.9415, 1.0670, 1.0215,\n 0.9763, 0.9313, 0.8866, 1.0106, 1.1339, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.0777, 1.0336, 0.9897, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.4091, 4.2848, 4.1633, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.5118, 4.3970, 4.5850, 4.7703, 4.6571, 4.5461, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.1490, 5.0410, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.9333, 6.0928, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.2816, 8.4138, 8.3138, 8.4449, 8.5749, 8.4763, 8.3789,\n 8.5079, 8.4116, 8.3164, 8.2222, 8.3503, 8.4774, 8.6035, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 9.1273, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.0102, 11.1151, 11.0309, 10.9473, 11.0517,\n 11.1556, 11.2589, 11.1761, 11.2789, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.5217, 11.4411, 11.3610, 11.4614, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.7000, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.5657, 12.4880, 12.5820, 12.5049, 12.4283,\n 12.5221, 12.6153, 12.7082, 12.6323, 12.7248, 12.8169, 12.7416, 12.8333,\n 12.9247, 13.0157, 12.9410, 12.8667, 12.7928, 12.8836, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The official visit of the Argentine minister marks a further step in the normalisation of UK-Argentine relations.\nHypothesis: Relations between Argentina and Great Britain are growing more cooperative.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 1.0719, 1.0070, 0.9428,\n 1.1138, 1.2831, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.7570, 0.6999, 0.8577, 0.8006, 0.7441, 0.6880, 0.8433,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.7509, 0.6983, 0.8447, 0.7921,\n 0.7399, 0.8847, 1.0284, 1.1711, 1.1183, 1.2597, 1.4001, 1.5396,\n 1.6781, 1.6246, 1.7619, 1.7085, 1.6554, 1.7913, 1.9263, 1.8732,\n 1.8204, 1.9540, 2.0868, 2.2188, 2.3500, 2.2966, 2.2436, 2.3735,\n 2.3206, 2.2680, 2.3967, 2.5247, 2.6519, 2.5990, 2.5466, 2.6726,\n 2.7979, 2.9225, 3.0464, 3.1696, 3.2921, 3.4140, 3.5351, 3.4816,\n 3.6019, 3.5485, 3.4954, 3.6148, 3.7335, 3.6805, 3.6277, 3.5753,\n 3.6929, 3.6407, 3.5887, 3.5370, 3.6537, 3.7697, 3.7180, 3.6667,\n 3.6156, 3.5648, 3.5143, 3.4641, 3.4142, 3.3645, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.0370, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.4816, 3.7009, 3.5753, 3.4528, 3.6667,\n 3.8765, 4.0825, 3.9614, 4.1633, 4.0446, 4.2426, 4.4374, 4.6291,\n 4.5118, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.4312, 5.3211, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 6.8034, 6.7006, 6.8483, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.9138, 8.0483, 7.9495, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.0000, 8.1291, 8.2572, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.2372, 8.3625, 8.4868, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.3024, 9.2159, 9.3320, 9.2463, 9.3617, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.6757, 9.5931, 9.5112, 9.6225,\n 9.7331, 9.8431, 9.9524, 10.0611, 9.9800, 10.0881, 10.1955, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.5393, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.7423, 10.6650, 10.7671, 10.6904,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 11.1173, 11.2164, 11.1410,\n 11.2396, 11.3378, 11.2630, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.0000,\n 11.9273, 11.8551, 11.7833, 11.8769, 11.9701, 11.8988, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: It is hoped that women, who constitute more than half of the population, will vote for other women and ensure that their issues are represented in parliament.\nHypothesis: Women are poorly represented in parliament.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.2408, 0.1796, 0.3573, 0.5331, 0.7071,\n 0.8793, 1.0498, 0.9864, 0.9238, 1.0915, 1.0290, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 1.1896, 1.3460, 1.2865, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.4517, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.7792, 1.9242, 2.0682, 2.0101, 1.9524, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.6710, 1.8119, 1.9518, 2.0907, 2.0349, 1.9795, 2.1170,\n 2.0617, 2.0068, 1.9524, 2.0881, 2.0338, 2.1685, 2.3022, 2.2478,\n 2.3805, 2.3262, 2.4578, 2.4037, 2.3500, 2.2966, 2.2436, 2.1909,\n 2.3206, 2.4495, 2.3967, 2.3443, 2.2923, 2.4198, 2.3679, 2.3163,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.2387, 2.1884, 2.1385, 2.2630,\n 2.3868, 2.5099, 2.4597, 2.4099, 2.5319, 2.4822, 2.4327, 2.3835,\n 2.5044, 2.4553, 2.5754, 2.6949, 2.6458, 2.7644, 2.7154, 2.8333,\n 2.7844, 2.7358, 2.6874, 2.6393, 2.5915, 2.7080, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.4536, 7.6140, 7.7723, 7.9286, 7.7942,\n 7.6624, 7.8174, 7.6883, 7.5615, 7.7150, 7.5907, 7.7426, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.8296, 7.7139, 7.8598, 7.7460,\n 7.8905, 7.7784, 7.6681, 7.8113, 7.7026, 7.8444, 7.7373, 7.8779,\n 7.7723, 7.9115, 8.0495, 7.9455, 7.8428, 7.9796, 8.1152, 8.2496,\n 8.1483, 8.0483, 8.1816, 8.0829, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.3326, 9.2435, 9.3611, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.1302, 9.2463, 9.3617, 9.4763, 9.5902, 9.7034,\n 9.8159, 9.9278, 9.8430, 9.7590, 9.6757, 9.7869, 9.7044, 9.8150,\n 9.7331, 9.6519, 9.5714, 9.6814, 9.7908, 9.8995, 10.0076, 9.9278,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.2790, 10.3827, 10.4858, 10.5884, 10.6904,\n 10.6144, 10.5388, 10.6404, 10.7415, 10.8421, 10.9422, 11.0418, 11.1410,\n 11.0661, 10.9917, 11.0904, 11.1886, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.9792, 12.0731, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: This growth proved short-lived, for a Swedish invasion ( 1655-56 ) devastated the flourishing city of Warsaw.\nHypothesis: Warsaw was invaded by the Swedes in 1655, and the city was devastated.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.3482, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.7581, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.7237, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.3453, 0.2949, 0.2449, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.0000, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, 0.0000,\n -0.0455, 0.0907, 0.0452, 0.0000, 0.1348, 0.2689, 0.2234, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.1317, 0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, 0.0427, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "154", + "Fraction of T in Greenlist": "77.4%", + "z-score": "17.1", + "p value": "1.31e-65", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.3254, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 7.6615, 7.4838, 7.6594, 7.8320, 7.6613, 7.4952, 7.6667,\n 7.8355, 8.0017, 8.1654, 8.3267, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.7943, 8.9456, 9.0949, 9.2424, 9.0924, 9.2388, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.8020, 9.9392, 10.0750, 10.2093, 10.0701,\n 9.9333, 10.0673, 10.2000, 10.3314, 10.4614, 10.5903, 10.4581, 10.5862,\n 10.4565, 10.5838, 10.7099, 10.8350, 10.9589, 11.0818, 11.2036, 11.3244,\n 11.4442, 11.3196, 11.1967, 11.3163, 11.4349, 11.5525, 11.6693, 11.7851,\n 11.6656, 11.7809, 11.6632, 11.7779, 11.8918, 12.0049, 12.1171, 12.2286,\n 12.3393, 12.4491, 12.5583, 12.4444, 12.3319, 12.4409, 12.5491, 12.6566,\n 12.7634, 12.8696, 12.7597, 12.8653, 12.7569, 12.8622, 12.9668, 13.0707,\n 13.1741, 13.2768, 13.3789, 13.4804, 13.5813, 13.4758, 13.3714, 13.4722,\n 13.5724, 13.4694, 13.3675, 13.4675, 13.5670, 13.6659, 13.7642, 13.8621,\n 13.7621, 13.8595, 13.9565, 14.0530, 14.1489, 14.2443, 14.3393, 14.4338,\n 14.3360, 14.4301, 14.3333, 14.4272, 14.5206, 14.6135, 14.7060, 14.7981,\n 14.8896, 14.9808, 15.0715, 14.9769, 14.8831, 14.9737, 15.0639, 15.1537,\n 15.2430, 15.3320, 15.2397, 15.3284, 15.2369, 15.3254, 15.4135, 15.5012,\n 15.5885, 15.6754, 15.7619, 15.8481, 15.9339, 15.8443, 15.7553, 15.8411,\n 15.9264, 16.0115, 16.0961, 16.1805, 16.0928, 16.1769, 16.0900, 16.1739,\n 16.2574, 16.3407, 16.4236, 16.5062, 16.5884, 16.6704, 16.7520, 16.6667,\n 16.5819, 16.6634, 16.7447, 16.8256, 16.9063, 16.9866, 17.0667])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The job gains mean that President Bush can celebrate - albeit by a very fine margin - a net growth in jobs in the US economy in his first term in office.\nHypothesis: More jobs were created during President Bush's first term.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.8889, 1.7457, 2.0370, 1.8974, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.6681, 2.5538, 2.4422, 2.3333,\n 2.2269, 2.4495, 2.3445, 2.2418, 2.1412, 2.0428, 1.9462, 1.8516,\n 1.7589, 1.9711, 1.8791, 1.7889, 1.7002, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.8245, 2.0135, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 2.0656,\n 1.9887, 1.9127, 1.8378, 2.0158, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.8257, 1.7552, 1.6854, 1.6164, 1.7865, 1.7178, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.2247,\n 1.3725, 1.3166, 1.2611, 1.4071, 1.5519, 1.6958, 1.6398, 1.5842,\n 1.5291, 1.4744, 1.6160, 1.5614, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.4662, 1.4142, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.2623, 1.2136, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.3284, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.2049, 1.1587, 1.1127, 1.0670, 1.0215,\n 0.9763, 0.9313, 1.0555, 1.1790, 1.1339, 1.0890, 1.2115, 1.3333,\n 1.4546, 1.4093, 1.3644, 1.3197, 1.2752, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.7357, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.1471, 6.0474, 5.9491, 5.8522, 6.0041, 6.1546,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.0211, 7.1591, 7.0662, 7.2029, 7.1110, 7.0201,\n 7.1556, 7.2900, 7.2001, 7.1111, 7.2443, 7.3765, 7.5076, 7.4194,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.4193, 7.5472, 7.6742, 7.5895,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.7152, 7.8384, 7.9608,\n 7.8791, 7.7981, 7.9196, 7.8393, 7.7597, 7.8803, 7.8014, 7.9212,\n 8.0402, 8.1585, 8.2760, 8.3927, 8.3143, 8.2365, 8.3525, 8.4678,\n 8.3906, 8.5052, 8.6190, 8.5424, 8.6556, 8.5796, 8.6921, 8.8039,\n 8.7284, 8.8396, 8.7647, 8.8752, 8.8008, 8.9107, 9.0200, 8.9461,\n 9.0548, 8.9815, 9.0895, 9.0167, 9.1242, 9.0518, 9.1587, 9.2651,\n 9.1932, 9.1218, 9.2276, 9.3328, 9.2619, 9.3665, 9.2961, 9.4002,\n 9.3302, 9.2607, 9.1916, 9.2952, 9.2265, 9.3295, 9.4320, 9.3638,\n 9.4658, 9.3980, 9.3306, 9.2637, 9.1971, 9.1310, 9.2324, 9.3333,\n 9.2676, 9.3680, 9.4680, 9.5675, 9.5021, 9.6011, 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Federal Bureau of Investigation started an independent probe of the circumstances shortly after the White House made plain that President Bill Clinton considered industrial espionage a particular threat to US economic interests.\nHypothesis: A US probe of the investigation started at the instigation of the Commerce Department.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.4237, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.7408, 1.6353, 1.8728, 1.7685, 1.6667,\n 1.8970, 2.1229, 2.0211, 2.2418, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.2743, 2.4804, 2.6833, 2.5873, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.0455, 1.9604, 2.1546, 2.3462, 2.5352, 2.7217,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.3028, 2.4841, 2.6632, 2.8402,\n 2.7585, 2.6778, 2.8518, 2.7717, 2.6928, 2.6148, 2.5378, 2.4618,\n 2.3868, 2.5560, 2.4814, 2.6485, 2.8138, 2.9775, 3.1394, 3.2998,\n 3.4586, 3.6159, 3.5396, 3.4641, 3.3895, 3.3156, 3.2426, 3.1704,\n 3.0989, 3.0282, 3.1814, 3.3333, 3.4839, 3.4130, 3.3428, 3.2733,\n 3.2044, 3.1363, 3.0688, 3.0019, 2.9357, 2.8701, 3.0168, 2.9515,\n 3.0967, 3.0317, 2.9673, 2.9035, 3.0467, 2.9832, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.8753, 2.8141, 2.7534, 2.8928, 3.0311, 2.9704,\n 2.9103, 2.8505, 2.7913, 2.7325, 2.6742, 2.6163, 2.5589, 2.6943,\n 2.6370, 2.5802, 2.7143, 2.6576, 2.6014, 2.5456, 2.6781, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.5343, 2.4803, 2.4267, 2.3735,\n 2.3206, 2.2680, 2.2159, 2.3443, 2.4721, 2.4198, 2.3679, 2.3163,\n 2.2650, 2.2140, 2.1634, 2.1131, 2.0631, 2.0134, 1.9640, 1.9149,\n 2.0396, 1.9906, 1.9419, 1.8935, 2.0170, 1.9686, 1.9206, 1.8728,\n 1.8252, 1.7780, 1.8999, 1.8527, 1.8058, 1.7592, 1.7128, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.4397, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 3.9056, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.0186, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 5.7646, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.1196, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.4685, 9.5939, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.5556, 9.6775, 9.7986, 9.9187, 9.8198,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.9813, 10.0984, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.3695, 10.4829, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.9637, 10.8729, 10.9816, 10.8916,\n 10.8025, 10.9107, 10.8224, 10.9301, 11.0371, 11.1435, 11.2493, 11.1621,\n 11.2674, 11.1810, 11.0952, 11.2001, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.4450, 11.5471, 11.4638, 11.3812, 11.4829, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.8210, 11.7405, 11.6606, 11.7595,\n 11.6802, 11.7787, 11.8766, 11.9741, 12.0712, 11.9927, 12.0893, 12.0114,\n 11.9340, 12.0302, 11.9534, 12.0493, 11.9730, 12.0685, 12.1635, 12.0878,\n 12.1825, 12.2767, 12.2016, 12.2954, 12.3888, 12.3143, 12.4074, 12.5000,\n 12.4260, 12.5183, 12.6102, 12.7017, 12.7928, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Researchers at the Harvard School of Public Health say that people who drink coffee may be doing a lot more than keeping themselves awake - this kind of consumption apparently also can help reduce the risk of diseases.\nHypothesis: Coffee drinking has health benefits.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.0793, 0.1571, 0.3892, 0.6172,\n 0.5353, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.8667, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.4103, 0.5832, 0.7543, 0.9238, 0.8617, 1.0290, 0.9671, 0.9058,\n 1.0705, 1.0094, 0.9488, 1.1111, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 0.9567, 1.1114, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.5283, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.3800, 0.5053, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "25", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "48.0%", + "z-score": "2.66", + "p value": "0.00396", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.6558])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: A top aide to Ms. Bhutto, Senator Latif Khosa, says that on the day of her assassination, Ms. Bhutto planned to give two visiting U.S. lawmakers a thick dossier outlining numerous instances of government pre-poll rigging involving voter registration as well as intimidation of PPP supporters. \"Everything was recorded in that, with all proofs that we have had, and with the independent reports of the foreign monitors who were also operating. Their reports also supported our allegations as to being substantial and as being true. So they were all recorded, but unfortunately she could not present the report because she was assassinated before she could do that,\" said Khosa.\nHypothesis: Latif Khosa was in a relationship with Ms. Bhutto.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.4804, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.4506, -0.4988, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.4376, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.4257, -0.4714, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.4914, -0.3563,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.3862, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.2540, -0.1267, -0.1684, -0.2100, -0.2513, -0.1253, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 3.8497, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.9097, 7.7723, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.4017, 8.5491, 8.6948, 8.8389, 8.9815,\n 9.1225, 8.9935, 9.1333, 9.2717, 9.4087, 9.5443, 9.4188, 9.2952,\n 9.4301, 9.5637, 9.6960, 9.5751, 9.7065, 9.8367, 9.9656, 10.0935,\n 9.9754, 9.8590, 9.9863, 10.1124, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.9727, 11.0902, 10.9794, 10.8699,\n 10.9870, 11.1033, 11.2187, 11.3333, 11.4471, 11.5601, 11.6723, 11.7838,\n 11.8944, 12.0044, 12.1136, 12.2221, 12.3299, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.7542, 12.8582, 12.9616, 13.0644, 12.9616, 13.0639,\n 13.1657, 13.2669, 13.1657, 13.2665, 13.3667, 13.2669, 13.3667, 13.4660,\n 13.3674, 13.2698, 13.1730, 13.0771, 12.9820, 13.0815, 13.1806, 13.0866,\n 13.1852, 13.2834, 13.3810, 13.4780, 13.5746, 13.6707, 13.5784, 13.4868,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.9615, 14.0550, 13.9650, 13.8756,\n 13.9690, 13.8804, 13.7926, 13.8857, 13.9784, 13.8914, 13.9838, 14.0758,\n 13.9896, 14.0813, 14.1725, 14.0872, 14.1781, 14.2686, 14.1842, 14.2744,\n 14.3642, 14.4536, 14.5426, 14.4591, 14.5479, 14.6362, 14.5535, 14.6416,\n 14.7293, 14.6473, 14.7348, 14.8219, 14.7406, 14.8274, 14.9139, 14.8333,\n 14.9195, 15.0054, 14.9255, 15.0111, 15.0964, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: South America - The President of Colombia Alvaro Uribe is scheduled to meet the President of Venezuela Hugo Ch\u00e1vez Thursday. Apparently the crisis between Venezuela and Colombia is almost solved. The crisis began with the imprisonment of the alleged FARC member Rodrigo Granda by Colombian forces on December 13, 2004. Venezuela accused Colombian of invading Venezuelan territory. Colombia accused Venezuela of harboring FARC terrorists. The President of Cuba, Fidel Castro, intervened in the crisis and talked to Chavez and Uribe.\nHypothesis: Alvaro Uribe is the current President of Colombia.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.5298, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.3365, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.2692, 0.2144, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.5227, 0.6768, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.6558, 0.6030, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.7293, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.5808, 0.7127,\n 0.8438, 0.9742, 1.1038, 1.0565, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.9520, 1.0788, 1.0328, 0.9870, 0.9415, 1.0670, 1.1918,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.3019, 1.2566, 1.2115, 1.1667,\n 1.1221, 1.2435, 1.3644, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.4222, 5.2778, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.9628, 6.1355, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.5593, 7.7026, 7.8444, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.3411, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.8299, 11.9370, 12.0433,\n 12.1491, 12.0516, 12.1568, 12.2615, 12.3655, 12.4689, 12.3729, 12.4759,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.5930, 12.6939, 12.7943, 12.8942,\n 12.9935, 12.9011, 12.8095, 12.9085, 13.0071, 13.1050, 13.2025, 13.1122,\n 13.2093, 13.3059, 13.4021, 13.4977, 13.5929, 13.6876, 13.5987, 13.6931,\n 13.7870, 13.8804, 13.9735, 14.0660, 14.1582, 14.2499, 14.1625, 14.2539,\n 14.3449, 14.2584, 14.3491, 14.2633, 14.3537, 14.4437, 14.5333, 14.6225,\n 14.5378, 14.6267, 14.7152, 14.8034, 14.8912, 14.9786, 15.0657, 15.1524,\n 15.0689, 14.9860, 15.0726, 15.1587, 15.0766, 15.1625, 15.0810, 15.1667,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The town is also home to the Dalai Lama and to more than 10,000 Tibetans living in exile.\nHypothesis: The Dalai Lama has been living in exile since 10,000.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.8729, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.4434,\n 1.6471, 1.5635, 1.4812, 1.6803, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.7450, 1.6667, 1.5894, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.6577, 1.8378, 1.7638, 1.6908, 1.8677, 2.0426, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.9262, 1.8559, 1.7865, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.8475, 2.0107, 1.9437, 1.8773, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 2.0918, 2.0276, 1.9640,\n 2.1182, 2.0548, 1.9920, 1.9298, 1.8682, 2.0197, 2.1700, 2.1082,\n 2.0470, 2.1954, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.2454,\n 2.1858, 2.1268, 2.2699, 2.2111, 2.1527, 2.0948, 2.0373, 2.1783,\n 2.3183, 2.2608, 2.2037, 2.3422, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.2535, 2.3891, 2.3333, 2.2780, 2.4122, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.3262, 2.4578, 2.4037, 2.3500, 2.4803, 2.6099, 2.5560,\n 2.5026, 2.4495, 2.3967, 2.5247, 2.4721, 2.4198, 2.5466, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.4653, 2.5898, 2.5386, 2.4877, 2.6112,\n 2.7340, 2.6830, 2.6323, 2.5820, 2.5319, 2.6534, 2.6034, 2.5538,\n 2.6742, 2.6247, 2.5754, 2.5265, 2.4778, 2.5969, 2.7154, 2.6667,\n 2.6182, 2.7358, 2.8528, 2.8043, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330, 2.1170,\n 2.5560, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321, 2.1004, 2.4495,\n 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 2.8284, 2.6558, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 3.0072, 2.8577, 3.1156, 2.9704, 2.8301, 2.6943,\n 2.9424, 3.1844, 3.4207, 3.6515, 3.5165, 3.7417, 3.6098, 3.8297, 4.0451,\n 4.2563, 4.4634, 4.6667, 4.8662, 5.0623, 4.9316, 4.8038, 4.9962, 4.8712,\n 5.0602, 5.2463, 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 6.0125, 6.1828,\n 6.3509, 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.7769, 6.6679, 6.5607, 6.7132, 6.6075,\n 6.7583, 6.6541, 6.5514, 6.4501, 6.5993, 6.7469, 6.8931, 6.7931, 6.9378,\n 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068, 7.2104, 7.1152, 7.0211,\n 7.1591, 7.2960, 7.4316, 7.3386, 7.2466, 7.3810, 7.2900, 7.2001, 7.3333,\n 7.2443, 7.1563, 7.0692, 7.2012, 7.1149, 7.0296, 7.1605, 7.0759, 6.9923,\n 6.9094, 7.0391, 6.9570, 6.8757, 6.7952, 6.9237, 7.0513, 7.1779, 7.3037,\n 7.2236, 7.3485, 7.2691, 7.1904, 7.1125, 7.2363, 7.1590, 7.0823, 7.0063,\n 6.9310, 7.0537, 7.1755, 7.2966, 7.2217, 7.1474, 7.0737, 7.1938, 7.3131,\n 7.2399, 7.1673, 7.2857, 7.2136, 7.3312, 7.2596, 7.1886, 7.1181, 7.2348,\n 7.1647, 7.0952, 7.0262, 6.9577, 7.0735, 7.1885, 7.3030, 7.2348, 7.3485,\n 7.4616, 7.5740, 7.6859, 7.7971, 7.9078, 8.0178, 8.1273, 8.2362, 8.3446,\n 8.2762, 8.2084, 8.3161, 8.4232, 8.3557, 8.4623, 8.3952, 8.5012, 8.6066,\n 8.7116, 8.8160, 8.9199, 9.0233, 8.9565, 8.8900, 8.9929, 8.9268, 8.8612,\n 8.7959, 8.8982, 8.8333, 8.7689, 8.7048, 8.8065, 8.9077, 9.0085, 9.1088,\n 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Adela Lupse was born in 1988 to a miner's family in the village of Poiana, Bihor in Transylvania, Romania. A member of Romania's first post-Ceausescu generation, she grasped the power of television at a young age: \"I think television is the most interesting phenomenon that the modern day has given us....for my type of personality, I find it impressive, the power that television has worldwide....We dress like the people that television promotes. We want to look like the celebrities that television launches.\"\nHypothesis: Bihor is situated in Romania.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.1677, -0.9258,\n -0.9941, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.0596, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.2173, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.4194, -1.2780,\n -1.1375, -0.9979, -0.8592, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.7377, -0.7789, -0.8199, -0.6885, -0.5579, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.4644, -0.5053, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.4145, -0.4548, -0.3299, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.5396, 1.8034, 1.6859, 1.5717, 1.8257,\n 1.7132, 1.6036, 1.8489, 1.7408, 1.9795, 2.2133, 2.1054, 2.3333,\n 2.5568, 2.7761, 2.6679, 2.8823, 2.7757, 2.6713, 2.5690, 2.4689,\n 2.6765, 2.8808, 3.0817, 3.2796, 3.4743, 3.6662, 3.8552, 3.7528,\n 3.9386, 4.1219, 4.0205, 3.9208, 4.1008, 4.0024, 3.9056, 4.0825,\n 3.9869, 4.1612, 4.3333, 4.2385, 4.4083, 4.5760, 4.7419, 4.6476,\n 4.8113, 4.7181, 4.6262, 4.5356, 4.4462, 4.6070, 4.7662, 4.9237,\n 5.0795, 5.2338, 5.3865, 5.5377, 5.4480, 5.5976, 5.7458, 5.6569,\n 5.5690, 5.7155, 5.6285, 5.5426, 5.6874, 5.6023, 5.7457, 5.8878,\n 5.8034, 5.9442, 6.0838, 6.2222, 6.1383, 6.2755, 6.1924, 6.1101,\n 6.0287, 5.9481, 6.0837, 6.2183, 6.3517, 6.4842, 6.6157, 6.7462,\n 6.8757, 6.7952, 6.9237, 7.0513, 6.9714, 6.8922, 7.0187, 6.9402,\n 6.8624, 6.9879, 6.9107, 7.0353, 7.1590, 7.0823, 7.2051, 7.3271,\n 7.4483, 7.3721, 7.4924, 7.4168, 7.3419, 7.2675, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.8988, 8.0139, 7.9403, 8.0546,\n 8.1683, 8.0952, 8.0227, 8.1356, 8.0636, 7.9921, 8.1043, 8.0333,\n 8.1448, 8.2557, 8.1851, 8.2954, 8.4050, 8.5141, 8.4439, 8.5524,\n 8.4826, 8.4133, 8.3446, 8.2762, 8.3840, 8.4911, 8.5978, 8.7039,\n 8.8094, 8.9145, 9.0190, 8.9509, 9.0549, 9.1584, 9.0906, 9.0233,\n 9.1262, 9.0593, 8.9929, 9.0952, 9.0292, 9.1310, 9.2324, 9.1667,\n 9.2676, 9.3680, 9.4680, 9.4026, 9.5021, 9.4370, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Merrill Lynch & Co. and Smith Barney, now a unit of Citigroup, in 1998 settled discrimination cases involving hundreds of female employees.\nHypothesis: Merrill Lynch & Co. and Smith Barney are now a unit of Citigroup.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.28", + "p value": "0.899", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.3000, -1.3646, -1.4281, -1.1926, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.5300, -1.5823, -1.6341, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -1.8953, -1.9379, -1.9803,\n -2.0224, -1.8676, -1.9098, -1.9518, -1.9935, -1.8411, -1.8829, -1.7321,\n -1.5822, -1.4335, -1.4762, -1.3288, -1.3717, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.2943, -1.3362, -1.3779, -1.4194, -1.2780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.5650, 1.8856,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.0370, 2.3190, 2.5924, 2.4495,\n 2.3116, 2.5744, 2.8301, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.8772, 4.0980, 3.9620, 3.8297, 3.7009, 3.5753, 3.4528, 3.3333,\n 3.2167, 3.1027, 2.9913, 2.8823, 2.7757, 2.9856, 2.8804, 2.7775,\n 2.9824, 3.1840, 3.0817, 3.2796, 3.1787, 3.3729, 3.5642, 3.4641,\n 3.3657, 3.5533, 3.7383, 3.9208, 4.1008, 4.0024, 4.1797, 4.0825,\n 4.2571, 4.1612, 4.3333, 4.5034, 4.4083, 4.3146, 4.4820, 4.6476,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.4160,\n 5.3243, 5.2338, 5.1444, 5.0562, 4.9691, 4.8830, 4.7980, 4.7140,\n 4.6311, 4.5491, 4.7001, 4.6188, 4.5384, 4.6876, 4.8355, 4.7556,\n 4.9019, 4.8226, 4.9675, 5.1111, 5.0323, 4.9543, 5.0964, 5.2372,\n 5.3769, 5.5155, 5.4377, 5.5750, 5.4977, 5.6338, 5.5572, 5.6921,\n 5.8260, 5.7498, 5.6743, 5.8069, 5.9386, 6.0693, 6.1990, 6.3278,\n 6.4558, 6.3803, 6.5072, 6.6332, 6.5582, 6.4838, 6.4101, 6.3369,\n 6.2644, 6.1926, 6.1213, 6.0506, 5.9805, 5.9109, 5.8419, 5.9660,\n 5.8974, 5.8294, 5.9524, 6.0746, 6.0069, 6.1283, 6.0609, 6.1815,\n 6.3013, 6.2342, 6.1677, 6.2866, 6.4048, 6.5223, 6.6391, 6.5727,\n 6.6887, 6.6227, 6.7380, 6.6724, 6.7869, 6.9009, 6.8355, 6.7706,\n 6.8838, 6.9964, 7.1083, 7.2197, 7.3305, 7.4407, 7.3758, 7.4853,\n 7.5944, 7.5297, 7.4655, 7.4017, 7.3383, 7.2753, 7.2127, 7.1506,\n 7.0888, 7.0273, 6.9663, 6.9056, 7.0133, 6.9530, 6.8930, 7.0000,\n 7.1065, 7.0467, 7.1527, 7.0932, 7.1985, 7.3034, 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: La Paz, 30 May 89 - La Paz Department Police authorities have disclosed that investigations into the murder of two young U.S. citizens are being conducted by a specialized group summoned specially to clarify this crime.\nHypothesis: Two young U.S. citizens were killed on 30 May 89.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.4281, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.1202, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.7913, -1.6444, -1.6859,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.7049, -1.7454, -1.7857, -1.6432,\n -1.6836, -1.7237, -1.5828, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.5916, -1.6292, -1.6667,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 5.6921, 5.4958, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.4222, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 6.2251, 6.0849, 6.2651, 6.1283, 6.3058, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.3578, 6.2505, 6.4065, 6.5607, 6.7132,\n 6.6075, 6.5033, 6.4006, 6.5514, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.9378, 6.8391, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.6466, 7.7778, 7.9079, 8.0370, 8.1651, 8.0741,\n 7.9839, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.3093, 8.4327,\n 8.3453, 8.2588, 8.3813, 8.2956, 8.2107, 8.1266, 8.0434, 8.1650,\n 8.0824, 8.0006, 8.1214, 8.2413, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.5088, 8.4303, 8.5456, 8.6603,\n 8.7742, 8.6963, 8.6190, 8.7323, 8.8448, 8.9567, 8.8800, 8.9912,\n 8.9151, 8.8396, 8.7647, 8.6903, 8.8008, 8.9107, 9.0200, 8.9461,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.1242, 9.2311, 9.1587, 9.0869,\n 9.0155, 8.9447, 9.0510, 9.1567, 9.2619, 9.1915, 9.1215, 9.2261,\n 9.3302, 9.4338, 9.3642, 9.4673, 9.3982, 9.3295, 9.2613, 9.1936,\n 9.2960, 9.3980, 9.4995, 9.4321, 9.5331, 9.6336, 9.7337, 9.8333,\n 9.7663, 9.8654, 9.9641, 10.0624, 9.9957, 10.0935, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: I asked myself how to understand the 'Twilight's success. And I have come to the idea that when you really believe in something, sooner or later it will become real. So, I guess this is what happened with Robert Pattinson. Last year he was just an unknown actor who's biggest role was in a pair of \"Harry Potter\" movies. Now, not only that Twilight is competing with 'Harry Potter', but Robert Pattinson is one of the most famous young actors who sucked $373.4 million from global box offices. So the movie about a vampire boy who falls in love with a normal girl, begun a real hysteria. And Pattinson has a lot to do with it!.\nHypothesis: Robert Pattinson is a vampire.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.2692, 0.4288, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.4082,\n 0.3558, 0.5064, 0.4540, 0.4020, 0.5507, 0.4988, 0.4472, 0.5941,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.5203, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.6351, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.4885, 0.6199, 0.5740, 0.5283, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.2940, 0.2513, 0.3760, 0.5000,\n 0.6234, 0.7461, 0.7029, 0.6598, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.1822, 2.4659, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.0509, 2.9212,\n 3.1558, 3.0290, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.3618, 4.2426, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.3100, 5.4848,\n 5.3709, 5.2590, 5.1490, 5.0410, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.6667, 5.5630, 5.4610, 5.3605, 5.2615, 5.4222,\n 5.3245, 5.4832, 5.3867, 5.5435, 5.6986, 5.8522, 5.7566, 5.9084,\n 5.8139, 5.9641, 6.1128, 6.2601, 6.4059, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 6.8834, 7.0201,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.4655, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.6785, 7.5912, 7.7192, 7.8463, 7.9724, 7.8859, 8.0111,\n 7.9254, 8.0497, 8.1731, 8.2956, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.8778, 8.7952, 8.9113,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.2867, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.4608, 9.3810, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.6619, 9.7701, 9.8776, 9.9846, 10.0910, 10.0131, 10.1189, 10.2242,\n 10.3289, 10.2516, 10.1749, 10.2790, 10.3827, 10.4858, 10.4097, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.7415, 10.8421, 10.7671, 10.8673, 10.9669,\n 11.0661, 10.9917, 10.9178, 11.0165, 11.1148, 11.2126, 11.1392, 11.2366,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.5489, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.6893, 11.6179, 11.7120, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Andreessen, who helped define the Internet revolution as part of team that created the first Internet browser (Mosaic) and his co-founding Netscape, told a packed hall at the San Francisco Marriott hotel Thursday that he is \"extremely committed\" to his startup Loudcloud.\nHypothesis: The Internet browser Mosaic was created at the San Francisco Marriott hotel.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -0.9428,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.4004, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.7644, -2.7990, -2.8333,\n -2.8675, -2.9016, -2.7701, -2.8043, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.6325, 0.9333, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.8034, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.9599, 2.2011, 2.4371, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.5568, 2.7761, 2.9913, 3.2026, 3.4101, 3.2998, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.4743, 3.3729, 3.5642, 3.4641,\n 3.6522, 3.8376, 4.0205, 4.2008, 4.1008, 4.0024, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.8667, 4.7683, 4.6715, 4.8375, 5.0017, 5.1640,\n 5.3245, 5.4832, 5.6401, 5.7955, 5.6986, 5.6032, 5.5090, 5.4160,\n 5.5691, 5.7207, 5.8707, 6.0193, 5.9270, 5.8358, 5.7458, 5.8926,\n 6.0380, 6.1820, 6.3248, 6.4663, 6.3768, 6.5169, 6.6559, 6.7937,\n 6.9303, 6.8414, 6.9768, 6.8889, 6.8019, 6.9361, 6.8500, 6.7648,\n 6.6804, 6.8133, 6.9451, 6.8615, 6.7788, 6.6968, 6.6157, 6.5354,\n 6.6658, 6.5861, 6.7155, 6.8439, 6.9714, 7.0980, 7.0187, 7.1443,\n 7.0658, 7.1904, 7.3143, 7.4373, 7.3592, 7.2818, 7.2051, 7.3271,\n 7.4483, 7.5687, 7.4924, 7.6120, 7.5364, 7.6551, 7.5800, 7.5056,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.7096, 7.8253, 7.9403, 7.8673,\n 7.9816, 8.0952, 8.2082, 8.3205, 8.4322, 8.5433, 8.4706, 8.3984,\n 8.5088, 8.6186, 8.7278, 8.8364, 8.9444, 9.0518, 9.1587, 9.2651,\n 9.3708, 9.4761, 9.5808, 9.5089, 9.4375, 9.5416, 9.4707, 9.5743,\n 9.5038, 9.4338, 9.5369, 9.6394, 9.7415, 9.6719, 9.6028, 9.7043,\n 9.6356, 9.5673, 9.4995, 9.6005, 9.7011, 9.8012, 9.7337, 9.8333,\n 9.9325, 9.8654, 9.9641, 10.0624, 10.1602, 10.2576, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Supreme Court said today states may bar the removal of life-sustaining treatment from comatose patients who have not made or cannot make their desires known.\nHypothesis: There is a Supreme Court decision about the removal of life-support.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.3333, 0.5298, 0.7237, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.8047, 0.7385,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.9238, 1.0915, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.9316, 0.8729,\n 0.8147, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 0.8997, 0.8433,\n 0.9972, 0.9409, 0.8850, 1.0370, 1.1877, 1.3373, 1.4857, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.2597, 1.4001, 1.3472,\n 1.2946, 1.4335, 1.3810, 1.3288, 1.4662, 1.6028, 1.7384, 1.8732,\n 1.8204, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.5110, 1.4606,\n 1.5926, 1.5423, 1.4923, 1.6230, 1.5731, 1.5236, 1.4743, 1.4254,\n 1.3768, 1.5055, 1.4570, 1.4087, 1.5363, 1.6632, 1.7894, 1.9149,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.6737, 1.6262, 1.5791, 1.5323,\n 1.6555, 1.6087, 1.5621, 1.6843, 1.6378, 1.5916, 1.5457, 1.6667,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.7233, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.3094,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 4.8742, 4.7336, 4.5968, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 4.8038, 4.6790, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.3067, 5.4870, 5.3666, 5.2485, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.5137, 6.4065, 6.3008, 6.1968,\n 6.3509, 6.2483, 6.4006, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.2601, 6.4059, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.4153, 6.5569, 6.4663, 6.6066, 6.7456, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.9768, 6.8889, 6.8019, 6.9361, 6.8500, 6.9830,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.6328, 7.7598, 7.6742, 7.5895,\n 7.7155, 7.8406, 7.7566, 7.6734, 7.5910, 7.5094, 7.4286, 7.5526,\n 7.4724, 7.5955, 7.7178, 7.8393, 7.7597, 7.6808, 7.6026, 7.5251,\n 7.4483, 7.5687, 7.6883, 7.6120, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.6235, 7.7407, 7.8571, 7.9729, 8.0880, 8.0139, 7.9403, 8.0546,\n 7.9816, 7.9091, 8.0227, 7.9507, 8.0636, 8.1758, 8.2874, 8.3984,\n 8.5088, 8.6186, 8.7278, 8.6560, 8.5848, 8.6933, 8.8013, 8.7305,\n 8.6603, 8.5905, 8.5212, 8.4523, 8.5595, 8.4911, 8.5978, 8.7039,\n 8.8094, 8.7414, 8.6738, 8.6066, 8.5399, 8.4736, 8.5785, 8.6828,\n 8.6169, 8.7207, 8.6551, 8.5899, 8.6932, 8.6284, 8.7311, 8.8333,\n 8.9351, 9.0364, 8.9718, 8.9077, 9.0085, 8.9446, 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: The Massachusetts Supreme Judicial Court has cleared the way for lesbian and gay couples in the state to marry, ruling that government attorneys \"failed to identify any constitutionally adequate reason\" to deny them the right.\nHypothesis: U.S. Supreme Court in favor of same-sex marriage\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -0.9608, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.0278, -1.0887,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -0.8716, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.2521, -1.2943, -1.3362, -1.1942, -1.0531, -1.0954,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -1.1651, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.2585, -1.2982, -1.1651, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.2950, -1.1667,\n -1.0390, -1.0777, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -0.6667, -0.1601, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.5627, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.4816, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.9954, 5.8812, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.1492, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.6173, 8.5149, 8.4138, 8.5448, 8.4449, 8.5749, 8.7039, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.6667, 8.5715, 8.4774, 8.6035, 8.7287,\n 8.6357, 8.5437, 8.4526, 8.3625, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.9178, 8.8304, 8.9496, 8.8631, 8.7773,\n 8.6924, 8.6083, 8.5249, 8.6433, 8.7610, 8.6783, 8.5964, 8.7133,\n 8.6321, 8.5516, 8.4718, 8.5879, 8.7033, 8.6241, 8.7388, 8.8527,\n 8.7742, 8.8874, 8.8095, 8.7323, 8.8448, 8.7681, 8.8800, 8.9912,\n 8.9151, 8.8396, 8.7647, 8.6903, 8.6165, 8.5433, 8.4706, 8.3984,\n 8.5088, 8.4371, 8.5469, 8.4757, 8.5848, 8.6933, 8.8013, 8.9087,\n 8.8379, 8.9447, 8.8744, 8.8045, 8.9107, 8.8413, 8.9469, 9.0520,\n 9.1566, 9.2607, 9.3642, 9.4673, 9.3982, 9.5007, 9.6028, 9.5341,\n 9.6356, 9.5673, 9.4995, 9.6005, 9.7011, 9.6336, 9.7337, 9.8333,\n 9.7663, 9.6996, 9.7987, 9.7325, 9.8311, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the premise entails the hypothesis. Answer 'yes' for entailment or 'no' for not entailment:\nPremise: Former WBA heavyweight champ Greg Page, who suffered a severe brain injury in a 2001 fight, has died at his Louisville home at the age of 50. According to Page's wife, the ex-champ died from complications due to boxing injuries and paralysis. Following a successful amateur career, Page went 58-17-1 during a professional career that began in 1979 and included wins over Jimmy Young, James Tillis, Renaldo Snipes, Gerrie Coetzee (for the WBA title), James 'Bonecrusher' Smith and Tim Witherspoon. Page's losses read like a who's who of heavyweights of the 1980s: Trevor Berbick, Witherspoon, Tony Tubbs, Buster Douglas, Joe Bugner, Orlin Norris, Donovan 'Razor' Ruddock, Bruce Seldon, Monte Barrett and Jorge Luis Gonzalez.\nHypothesis: Greg Page was a boxer.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.7698, 0.6644, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 0.9258,\n 1.1471, 1.0613, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.4000, 1.5894, 1.5133, 1.4382, 1.6239, 1.5492,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.7233,\n 1.8972, 1.8257, 1.9973, 1.9262, 2.0954, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.8773, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.5945, 1.7465, 1.6865,\n 1.8370, 1.7772, 1.7179, 1.6591, 1.6008, 1.7488, 1.6906, 1.8371,\n 1.9825, 1.9242, 2.0682, 2.0101, 2.1527, 2.0948, 2.0373, 1.9803,\n 1.9237, 1.8676, 1.8119, 1.7566, 1.8962, 1.8411, 1.9795, 2.1170,\n 2.2535, 2.1980, 2.1429, 2.0881, 2.0338, 1.9799, 1.9263, 1.8732,\n 2.0071, 1.9540, 2.0868, 2.0339, 2.1656, 2.1128, 2.2436, 2.1909,\n 2.1386, 2.2680, 2.2159, 2.3443, 2.4721, 2.4198, 2.5466, 2.4944,\n 2.6203, 2.5683, 2.5166, 2.4653, 2.4142, 2.3635, 2.3131, 2.2630,\n 2.3868, 2.5099, 2.6323, 2.5820, 2.7036, 2.6534, 2.6034, 2.5538,\n 2.5044, 2.6247, 2.5754, 2.6949, 2.6458, 2.5969, 2.7154, 2.6667,\n 2.6182, 2.7358, 2.6874, 2.6393, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.9279, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.3142, 4.5260, 4.7336, 4.5968, 4.8003, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 5.7192, 5.8919, 5.7735,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.0982, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.4501, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.6944, 6.8391, 6.7416, 6.8849, 6.7886, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.2532, 7.1591, 7.2960, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.2900, 7.4233, 7.5556, 7.4655, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.4622, 7.5912, 7.5048, 7.6328, 7.5472, 7.4625, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.6734, 7.7976, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.0006, 8.1214, 8.0403, 7.9600, 8.0798, 8.1989, 8.1192,\n 8.2375, 8.1585, 8.2760, 8.3927, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.5052, 8.4286, 8.5424, 8.6556, 8.5796, 8.6921, 8.6166,\n 8.7284, 8.8396, 8.9502, 8.8752, 8.9851, 8.9107, 9.0200, 8.9461,\n 8.8728, 8.9815, 9.0895, 9.0167, 9.1242, 9.0518, 9.1587, 9.2651,\n 9.3708, 9.2990, 9.4042, 9.3328, 9.4375, 9.3665, 9.2961, 9.4002,\n 9.5038, 9.4338, 9.5369, 9.4673, 9.5698, 9.6719, 9.7735, 9.7043,\n 9.8054, 9.7367, 9.8373, 9.7690, 9.7011, 9.8012, 9.9008, 9.8333,\n 9.9325, 9.8654, 9.9641, 10.0624, 10.1602, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + } + ], + "metrics": { + "accuracy_without_watermark": 0.53, + "accuracy_with_watermark": 0.55, + "f1_without_watermark": 0.5123975516132379, + "f1_with_watermark": 0.4950061721467849 + } + } + }, + "wnli": { + "train": { + "results": [ + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I stuck a pin through a carrot. When I pulled the pin out, it had a hole.\nWith pronoun replaced: The carrot had a hole.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.4140, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.5695, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.6351, -0.6783, -0.5410, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 3.1177, 2.9439, 2.7778, 2.6186, 2.4659, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 3.7417, 3.6098, 3.8297, 3.7009, 3.5753, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 4.2426, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.5850, 4.4721, 4.6571, 4.5461, 4.4371, 4.3301,\n 4.2251, 4.1219, 4.0205, 4.2008, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.6000, 4.5034, 4.6715, 4.8375, 5.0017, 4.9058,\n 5.0679, 5.2281, 5.3867, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.3122, 6.2197, 6.1283,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.6066, 6.7456, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.7536, 6.8889, 7.0231, 7.1563, 7.0692, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.6328, 7.5472, 7.6742, 7.8003,\n 7.9254, 7.8406, 7.9649, 8.0882, 8.2107, 8.3324, 8.2483, 8.3691,\n 8.4891, 8.4057, 8.3231, 8.4423, 8.5607, 8.6783, 8.5964, 8.7133,\n 8.8294, 8.9448, 9.0595, 8.9783, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.2232, 9.3338, 9.2559, 9.3659,\n 9.2885, 9.3979, 9.3212, 9.4299, 9.3537, 9.4619, 9.3863, 9.4939,\n 9.4188, 9.5258, 9.4513, 9.5577, 9.4837, 9.5896, 9.5161, 9.6214,\n 9.5485, 9.6532, 9.5808, 9.6850, 9.6130, 9.7167, 9.6452, 9.7483,\n 9.6774, 9.7800, 9.7095, 9.8116, 9.7415, 9.8431, 9.7735, 9.8746,\n 9.8054, 9.9060, 9.8373, 9.9374, 9.8691, 9.9687, 9.9008, 10.0000,\n 9.9325, 10.0312, 9.9641, 10.0624, 9.9957, 10.0935, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: John couldn't see the stage with Billy in front of him because he is so short.\nWith pronoun replaced: John is so short.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "33.3%", + "z-score": "0.577", + "p value": "0.282", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "33.3%", + "z-score": "0.577", + "p value": "0.282", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n 0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The police arrested all of the gang members. They were trying to stop the drug trade in the neighborhood.\nWith pronoun replaced: The police were trying to stop the drug trade in the neighborhood.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.0735, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.2111, -1.2604, -1.3093,\n -1.1406, -0.9733, -1.0229, -1.0721, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -1.9445, -1.9837, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -1.9101, -1.9473, -1.9843, -2.0212, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.3094, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.0186, 5.2223, 5.0779, 4.9373, 4.8003, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.4444, 5.6307, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.1490, 5.3211, 5.4909, 5.6585, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.0943, 5.9932, 5.8936, 6.0474, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.9378, 7.0812, 7.2232, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.2104, 7.3485, 7.4853, 7.6210, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.3274, 8.2372, 8.1481, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.3453, 8.4679, 8.5896, 8.7104, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.1302, 9.0453, 9.1615, 9.2768, 9.3915, 9.5054,\n 9.4213, 9.3380, 9.2554, 9.3686, 9.4812, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.8712, 9.9800, 10.0881, 10.0076, 10.1151,\n 10.2220, 10.3284, 10.4341, 10.3544, 10.2753, 10.1968, 10.3020, 10.4067,\n 10.5109, 10.4330, 10.3557, 10.4594, 10.5625, 10.6650, 10.7671, 10.8686,\n 10.9697, 11.0702, 10.9936, 11.0937, 11.0177, 10.9422, 11.0418, 11.1410,\n 11.2396, 11.3378, 11.2630, 11.1886, 11.1148, 11.0414, 11.1392, 11.2366,\n 11.3335, 11.2607, 11.1883, 11.2848, 11.3809, 11.4766, 11.5718, 11.6667,\n 11.7611, 11.8551, 11.7833, 11.8769, 11.8056, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Steve follows Fred's example in everything. He influences him hugely.\nWith pronoun replaced: Steve influences him hugely.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -1.5492, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.4444, 0.3871, 0.5505, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.3453, 0.2949, 0.2449, 0.3904, 0.3404, 0.2907, 0.2414, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, -0.1391, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.3522, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.2146, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.0420, -0.0838, -0.1253, 0.0000,\n 0.1247, 0.0829, 0.0413, 0.0000, 0.1234, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.1921, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.6098, 3.4816, 3.7009, 3.5753, 3.7897, 4.0000,\n 3.8765, 4.0825, 3.9614, 3.8431, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 4.8394, 5.0190, 4.9075,\n 4.7980, 4.9747, 4.8669, 5.0410, 4.9348, 5.1065, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.4610, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.7382, 5.8936, 6.0474, 5.9491, 5.8522, 5.7566, 5.9084,\n 5.8139, 5.7207, 5.8707, 6.0193, 6.1664, 6.0740, 6.2197, 6.1283,\n 6.0380, 6.1820, 6.3248, 6.4663, 6.6066, 6.7456, 6.6559, 6.7937,\n 6.7049, 6.8414, 6.7536, 6.8889, 7.0231, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.6328, 7.5472, 7.6742, 7.8003,\n 7.9254, 8.0497, 7.9649, 8.0882, 8.0042, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.0006, 8.1214, 8.2413, 8.3605, 8.2793, 8.3977, 8.3172,\n 8.2375, 8.3550, 8.4718, 8.3927, 8.5088, 8.6241, 8.7388, 8.8527,\n 8.9660, 8.8874, 8.8095, 8.9221, 9.0340, 9.1452, 9.2559, 9.1785,\n 9.2885, 9.2118, 9.1357, 9.2450, 9.1694, 9.2782, 9.2032, 9.3113,\n 9.4188, 9.5258, 9.4513, 9.5577, 9.4837, 9.4103, 9.5161, 9.6214,\n 9.5485, 9.6532, 9.7574, 9.8611, 9.9642, 10.0668, 9.9944, 9.9224,\n 10.0245, 10.1262, 10.2273, 10.3280, 10.2565, 10.3566, 10.2856, 10.2151,\n 10.3148, 10.2447, 10.3439, 10.2743, 10.3730, 10.4713, 10.5692, 10.5000,\n 10.5974, 10.5286, 10.4603, 10.5573, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.\nWith pronoun replaced: mother was careful not to disturb her, undressing and climbing back into her berth.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.1562, 0.0000, 0.1549, 0.1029, 0.0512, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.0467, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.0000,\n 0.1365, 0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.0447, 0.1782,\n 0.1332, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.3443, 0.3004, 0.2568, 0.3841, 0.3405,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.4949, 0.4525, 0.4103, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "195", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "42.6%", + "z-score": "5.66", + "p value": "7.38e-09", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495, 2.1170,\n 2.5560, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 3.4641, 3.7808, 3.5382,\n 3.8411, 4.1312, 3.9056, 4.1851, 3.9727, 3.7712, 4.0415, 3.8497, 3.6667,\n 3.4915, 3.3235, 3.1623, 3.0072, 2.8577, 2.7136, 2.9704, 2.8301, 2.6943,\n 2.5627, 2.4351, 2.6811, 2.5560, 2.4345, 2.3163, 2.2011, 2.0889, 2.3238,\n 2.2133, 2.1054, 2.0000, 1.8970, 1.7963, 1.6977, 1.9215, 1.8240, 1.7285,\n 1.9462, 2.1602, 2.3706, 2.5775, 2.4804, 2.6833, 2.5873, 2.4930, 2.4004,\n 2.5981, 2.5064, 2.4163, 2.6098, 2.5205, 2.4327, 2.6222, 2.5352, 2.7217,\n 2.6354, 2.5504, 2.7333, 2.6491, 2.5660, 2.7456, 2.6632, 2.8402, 3.0151,\n 3.1879, 3.1052, 3.0237, 3.1937, 3.3619, 3.5282, 3.6927, 3.6107, 3.7732,\n 3.6919, 3.8523, 3.7717, 3.6920, 3.8503, 3.7712, 3.9276, 4.0825, 4.2359,\n 4.3879, 4.3086, 4.4590, 4.3804, 4.5291, 4.6765, 4.5983, 4.7442, 4.6667,\n 4.5899, 4.7341, 4.6580, 4.5826, 4.5079, 4.4341, 4.3609, 4.2885, 4.2167,\n 4.1457, 4.2870, 4.2164, 4.1464, 4.0771, 4.0085, 4.1478, 4.0795, 4.0119,\n 3.9448, 3.8784, 3.8125, 3.9497, 3.8841, 3.8191, 3.7547, 3.6908, 3.6274,\n 3.5645, 3.6995, 3.6369, 3.5748, 3.7084, 3.8411, 3.9729, 4.1038, 4.0415,\n 4.1713, 4.1092, 4.0476, 3.9865, 4.1150, 4.2426, 4.1816, 4.1210, 4.0608,\n 4.1872, 4.3128, 4.2527, 4.1931, 4.1338, 4.2582, 4.3818, 4.3226, 4.2639,\n 4.2056, 4.3280, 4.4497, 4.3915, 4.3336, 4.2762, 4.3967, 4.5166, 4.4593,\n 4.5783, 4.6968, 4.6395, 4.5826, 4.7001, 4.8170, 4.9333, 5.0489, 4.9918,\n 5.1068, 5.0499, 5.1642, 5.1075, 5.0513, 5.1647, 5.1086, 5.2213, 5.3335,\n 5.4451, 5.5562, 5.5000, 5.6104, 5.5545, 5.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: George got free tickets to the play, but he gave them to Eric, because he was particularly eager to see it.\nWith pronoun replaced: George was particularly eager to see it.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "152", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "36.2%", + "z-score": "3.18", + "p value": "0.000725", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.7924, 1.0445, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 1.0510, 0.9608, 1.1896, 1.4142, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.5275, 1.7321,\n 1.9335, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.7130, 1.6330,\n 1.8245, 2.0135, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.9887, 1.9127, 2.0913, 2.2678, 2.4423, 2.3658, 2.2902, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.1669, 2.3349, 2.5011, 2.6656, 2.5927,\n 2.5207, 2.4495, 2.3791, 2.3094, 2.4703, 2.4010, 2.5600, 2.7175,\n 2.8735, 2.8039, 2.7349, 2.6667, 2.5991, 2.5322, 2.6852, 2.6186,\n 2.7699, 2.9200, 3.0688, 3.0019, 2.9357, 2.8701, 2.8051, 2.7406,\n 2.8868, 2.8226, 2.9673, 3.1109, 3.2533, 3.1889, 3.1251, 3.0619,\n 2.9991, 2.9369, 3.0770, 3.0151, 2.9537, 3.0923, 3.2299, 3.1685,\n 3.1076, 3.0471, 2.9872, 2.9277, 3.0632, 3.0039, 2.9451, 3.0792,\n 3.2124, 3.1536, 3.0952, 3.0373, 2.9798, 2.9227, 3.0540, 3.1844])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.0000, 4.3235, 4.6291, 4.9193, 5.1962, 4.9010, 5.1711,\n 5.4306, 5.1640, 4.9135, 4.6775, 4.9358, 4.7140, 4.5033, 4.7556, 4.5556,\n 4.8008, 4.6101, 4.8488, 4.6663, 4.4907, 4.7237, 4.9507, 5.1723, 5.0037,\n 5.2204, 5.4322, 5.2697, 5.1121, 5.3199, 5.1671, 5.3708, 5.2223, 5.4222,\n 5.2778, 5.4740, 5.3333, 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997,\n 5.6830, 5.5549, 5.7354, 5.6099, 5.7877, 5.6647, 5.8398, 5.7192, 5.8919,\n 5.7735, 5.9438, 5.8275, 5.9954, 5.8812, 6.0469, 5.9346, 6.0982, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.3008, 6.1968, 6.3509,\n 6.2483, 6.4006, 6.2994, 6.4501, 6.3502, 6.4993, 6.4008, 6.5483, 6.4510,\n 6.5970, 6.5008, 6.6454, 6.5504, 6.6935, 6.5997, 6.7414, 6.6486, 6.7890,\n 6.6973, 6.8364, 6.7456, 6.8834, 6.7937, 6.9303, 6.8414, 6.9768, 6.8889,\n 7.0231, 6.9361, 7.0692, 6.9830, 7.1149, 7.0296, 7.1605, 7.0759, 7.2058,\n 7.1220, 7.2508, 7.1678, 7.2956, 7.2134, 7.3402, 7.2587, 7.3845, 7.3037,\n 7.4286, 7.3485, 7.4724, 7.3930, 7.5161, 7.4373, 7.5595, 7.4813, 7.6026,\n 7.5251, 7.6456, 7.5687, 7.6883, 7.6120, 7.7308, 7.6551, 7.7732, 7.6980,\n 7.8153, 7.7407, 7.8571, 7.7831, 7.8988, 7.8253, 7.9403, 7.8673, 7.9816,\n 7.9091, 8.0227, 7.9507, 8.0636, 7.9921, 8.1043, 8.0333, 8.1448, 8.0742,\n 8.1851, 8.1150, 8.2252, 8.1556, 8.2652, 8.1960, 8.3050, 8.2362, 8.3446,\n 8.2762, 8.3840, 8.3161, 8.4232, 8.3557, 8.4623, 8.3952, 8.5012, 8.4345,\n 8.5399, 8.4736, 8.5785, 8.5126, 8.6169, 8.5513, 8.6551, 8.5899, 8.6932,\n 8.6284, 8.7311, 8.6667, 8.7689, 8.7048, 8.8065, 8.7427, 8.8439, 8.7805,\n 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: John was jogging through the park when he saw a man juggling watermelons. He was very impressive.\nWith pronoun replaced: John was very impressive.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -0.9045, -0.5941, -0.2928, -0.3849, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.6809, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.7570, -0.5922, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.6783, -0.5311, -0.5774,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.8489, -0.7127,\n -0.5774, -0.4428, -0.4857, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.6598, -0.5347, -0.5744, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "79", + "Fraction of T in Greenlist": "39.7%", + "z-score": "4.79", + "p value": "8.4e-07", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.7823, 2.0494, 1.9245, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.7132, 1.6036, 1.4968, 1.3926, 1.6353, 1.8728, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.2743, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.8477, 1.7634, 1.6803, 1.8766, 2.0702, 1.9870, 1.9052,\n 1.8245, 1.7450, 1.6667, 1.8543, 1.7765, 1.6997, 1.6239, 1.8074,\n 1.9887, 2.1678, 2.0913, 2.0158, 2.1918, 2.1167, 2.2902, 2.4618,\n 2.6316, 2.5560, 2.4814, 2.4077, 2.3349, 2.2629, 2.1917, 2.3570,\n 2.5207, 2.6828, 2.6112, 2.5403, 2.7001, 2.6296, 2.7875, 2.9439,\n 3.0989, 3.0282, 2.9582, 2.8889, 2.8203, 2.7524, 2.6852, 2.8368,\n 2.9872, 3.1363, 3.0688, 3.0019, 3.1492, 3.0827, 3.2285, 3.3731,\n 3.5166, 3.4499, 3.3838, 3.3182, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.4058, 3.5446, 3.4806, 3.4171, 3.5544, 3.4913, 3.6274, 3.7626,\n 3.8968, 3.8335, 3.7707, 3.7084, 3.6466, 3.5853, 3.5245, 3.6566,\n 3.7878, 3.9181, 3.8571, 3.7966, 3.9258, 3.8655, 3.9936, 4.1210,\n 4.2475, 4.1872, 4.1273, 4.0678, 4.0087, 3.9501, 3.8919, 4.0166,\n 4.1406, 4.2639, 4.2056, 4.1477, 4.2699, 4.2122, 4.3336, 4.4544,\n 4.5744, 4.5166, 4.4593, 4.4023, 4.3456, 4.2893, 4.2334, 4.3519,\n 4.2962, 4.4140, 4.3585, 4.4754, 4.5918, 4.7076, 4.6520, 4.7670,\n 4.8815, 4.9953, 4.9397, 5.0529, 5.1655, 5.2776, 5.2220, 5.1667,\n 5.1117, 5.0571, 5.0027, 4.9487, 4.8950, 4.8416, 4.7885])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I couldn't put the pot on the shelf because it was too tall.\nWith pronoun replaced: The pot was too tall.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -1.7889, -1.8481, -1.9064, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.9127, -1.9645, -2.0158, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -1.9262, -1.9757, -2.0247, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -2.1954, -2.2385, -2.0739, -2.1172, -2.1602, -1.9980, -1.8371,\n -1.8808, -1.9242, -1.9673, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.2188, -2.2578, -2.2966, -2.1520, -2.0083,\n -2.0476, -2.0866, -2.1254, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.4910, -2.3580, -2.3938, -2.4294, -2.2977, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 1.9415, 1.8257,\n 2.0738, 2.3163, 2.5533, 2.7852, 2.6681, 2.8943, 3.1160, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.8431, 3.7273, 3.9284, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.3824, 3.2796, 3.4743, 3.6662, 3.5642, 3.7528,\n 3.9386, 3.8376, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.2981, 5.4610, 5.3605, 5.2615, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.7955, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.6210, 7.7555, 7.8889, 8.0212, 7.9259,\n 7.8318, 7.9630, 7.8699, 7.7778, 7.9079, 7.8168, 7.7268, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.3453, 8.4679, 8.5896, 8.5030, 8.4173, 8.5381, 8.4532, 8.3691,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.7610, 8.6783, 8.7952, 8.7133,\n 8.8294, 8.9448, 9.0595, 9.1735, 9.0923, 9.0117, 9.1250, 9.0452,\n 9.1577, 9.2697, 9.3810, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.2753, 10.3805, 10.3020, 10.4067,\n 10.3289, 10.4330, 10.5366, 10.6397, 10.7423, 10.6650, 10.5884, 10.6904,\n 10.6144, 10.5388, 10.4638, 10.5654, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.0165, 10.9431, 11.0414, 10.9685, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.4766, 11.4047, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: We had hoped to place copies of our newsletter on all the chairs in the auditorium, but there were simply not enough of them.\nWith pronoun replaced: There were simply not enough copies of the newsletter.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "50", + "Fraction of T in Greenlist": "25.1%", + "z-score": "0.0409", + "p value": "0.484", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.1275, 0.0634, 0.2520, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, 0.0000, 0.1459, 0.2907, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.3797, 0.3311, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.1826,\n 0.3185, 0.2722, 0.2261, 0.1803, 0.1348, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.3974, 0.3522, 0.3073, 0.2626, 0.3928, 0.3482,\n 0.3038, 0.2596, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.1240, 0.0000, 0.1234, 0.0821, 0.0409])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "77", + "# Tokens in Greenlist": "23", + "Fraction of T in Greenlist": "29.9%", + "z-score": "0.987", + "p value": "0.162", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, 0.1703, 0.4211, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.4730, 0.6712, 0.6000, 0.7947, 0.9869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: At the Loebner competition the judges couldn't figure out which respondents were the chatbots because they were so advanced.\nWith pronoun replaced: The judges were so advanced.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.5963, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 1.0523, 1.2309,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.1375, 1.3101, 1.2439, 1.4142,\n 1.5828, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.4931, 1.6514, 1.5892, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.6271, 1.7772, 1.9261, 1.8665, 1.8074, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.8091, 1.7522, 1.8953, 1.8385, 1.9803,\n 1.9237, 1.8676, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.5187, 1.4662, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.5303, 1.6641, 1.7970, 1.7454, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.6230, 1.7529, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.8102, 1.7609, 1.8875, 2.0134, 2.1385, 2.0889,\n 2.0396, 1.9906, 1.9419, 1.8935, 1.8453, 1.9686, 2.0913, 2.2133,\n 2.1648, 2.1167, 2.0688, 2.0212, 2.1418, 2.0943, 2.2141, 2.3333,\n 2.4520, 2.4042, 2.3567, 2.4744, 2.4269, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "198", + "Fraction of T in Greenlist": "99.5%", + "z-score": "24.3", + "p value": "2.04e-130", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.7469, 5.0483, 5.3333, 5.6045, 5.8635, 6.1119, 6.3509,\n 6.5813, 6.8041, 7.0200, 7.2296, 7.4333, 7.6317, 7.8251, 8.0139,\n 8.1984, 8.3789, 8.5556, 8.7287, 8.8985, 9.0652, 9.2289, 9.3897,\n 9.5479, 9.7034, 9.8566, 10.0074, 10.1560, 10.3024, 10.4469, 10.5893,\n 10.7299, 10.8686, 11.0056, 11.1410, 11.2747, 11.4068, 11.5375, 11.6667,\n 11.7944, 11.9208, 12.0459, 12.1697, 12.2923, 12.4137, 12.5338, 12.6529,\n 12.7708, 12.8877, 13.0035, 13.1183, 13.2321, 13.3449, 13.4568, 13.5677,\n 13.6778, 13.7870, 13.8953, 14.0028, 14.1095, 14.2154, 14.3204, 14.4248,\n 14.5284, 14.6312, 14.7333, 14.8348, 14.9355, 15.0356, 15.1350, 15.2337,\n 15.3319, 15.4294, 15.5262, 15.6225, 15.7182, 15.8133, 15.9079, 16.0019,\n 16.0953, 16.1882, 16.2806, 16.3725, 16.4638, 16.5547, 16.6450, 16.7349,\n 16.8242, 16.9131, 17.0016, 17.0896, 17.1771, 17.2642, 17.3508, 17.4371,\n 17.5229, 17.6082, 17.6932, 17.7778, 17.8619, 17.9457, 18.0291, 18.1121,\n 18.1947, 18.2769, 18.3588, 18.4403, 18.5215, 18.6023, 18.6827, 18.7628,\n 18.8426, 18.9220, 19.0011, 19.0799, 19.1584, 19.2365, 19.3143, 19.3918,\n 19.4690, 19.5459, 19.6225, 19.6987, 19.7747, 19.8504, 19.9259, 20.0010,\n 20.0758, 20.1504, 20.2247, 20.2987, 20.3725, 20.4460, 20.5192, 20.5922,\n 20.6649, 20.7373, 20.8095, 20.8815, 20.9532, 21.0246, 21.0959, 21.1668,\n 21.2376, 21.3081, 21.3784, 21.4484, 21.5182, 21.5878, 21.6572, 21.7263,\n 21.7953, 21.8640, 21.9325, 22.0007, 22.0688, 22.1367, 22.2043, 22.2718,\n 22.3390, 22.4061, 22.4729, 22.5395, 22.6060, 22.6722, 22.7383, 22.8042,\n 22.8699, 22.9353, 23.0006, 23.0658, 23.1307, 23.1955, 23.2600, 23.3244,\n 23.3886, 23.4527, 23.5166, 23.5803, 23.6438, 23.7071, 23.7703, 23.8333,\n 23.8962, 23.9589, 24.0214, 24.0838, 24.1460, 24.2080, 24.2699])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I took the water bottle out of the backpack so that it would be lighter.\nWith pronoun replaced: I took the water bottle out of the backpack so that the backpack would be lighter.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.5636, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.9074, -0.9567, -0.7939, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.4444, 5.6307, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.9132, 5.7877, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.5166, 6.6803, 6.5597, 6.4413, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.4667, 6.6227, 6.5137, 6.6679, 6.8205, 6.9714,\n 6.8641, 6.7583, 6.9076, 7.0553, 7.2016, 7.3464, 7.2421, 7.3855,\n 7.2827, 7.1813, 7.3233, 7.2232, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.1873, 8.3164, 8.4444, 8.5715, 8.4774, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 8.8228, 8.7327, 8.8544,\n 8.7652, 8.8860, 9.0060, 9.1252, 9.0370, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.6210, 9.5338, 9.6484, 9.7622, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.7312, 9.6471, 9.7590, 9.6757, 9.5931, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.3810, 9.4916, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.9067, 10.0131, 9.9357, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.0987, 10.2029, 10.3065, 10.2310, 10.3341,\n 10.2591, 10.3617, 10.2872, 10.2132, 10.3154, 10.2419, 10.3435, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.5001, 10.4281, 10.3566, 10.4563, 10.3853,\n 10.4846, 10.4140, 10.5128, 10.4427, 10.3730, 10.4713, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.7910, 10.8872, 10.8184, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The table won't fit through the doorway because it is too narrow.\nWith pronoun replaced: The table is too narrow.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.2567, -1.3198, -1.3820, -1.1547,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.4027, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.5852,\n -1.6340, -1.6823, -1.5070, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -1.9107, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.0476, -2.0881, -2.1284, -1.9799, -1.8324, -1.8732,\n -1.9137, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -2.0512, -2.0889,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.2618, -2.2977, -2.3333,\n -2.2026, -2.0726, -2.1086, -2.1444, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "5", + "# Tokens in Greenlist": "2", + "Fraction of T in Greenlist": "40.0%", + "z-score": "0.775", + "p value": "0.219", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I took the water bottle out of the backpack so that it would be handy.\nWith pronoun replaced: I took the water bottle out of the backpack so that the backpack would be handy.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.6255, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.5636, -0.3735, -0.4333, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.9074, -0.9567, -0.7939, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.7493,\n -0.7935, -0.8374, -0.6956, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -0.8727, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.4958, 5.7155,\n 5.9297, 5.7429, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 6.2075,\n 6.4019, 6.5924, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 7.8174, 7.9704, 8.1216, 8.2711, 8.1428, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.6192, 8.7599, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.7183, 9.8416, 9.9640,\n 10.0855, 9.9817, 9.8792, 10.0000, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.2923, 10.4083, 10.5236, 10.6380, 10.5409,\n 10.6547, 10.7678, 10.8801, 10.7843, 10.6894, 10.8012, 10.9123, 11.0227,\n 10.9291, 11.0389, 11.1480, 11.2564, 11.1640, 11.0724, 11.1803, 11.2877,\n 11.3944, 11.3039, 11.4101, 11.5156, 11.6206, 11.5311, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.6667, 11.7696, 11.8719, 11.9737, 11.8870, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.0185, 12.1184, 12.2178, 12.3167, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.5685, 12.4872, 12.5831, 12.6785, 12.7735, 12.6930, 12.7876, 12.8817,\n 12.9755, 12.8957, 12.8165, 12.9099, 13.0030, 13.0956, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.1376, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.5265, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The firemen arrived after the police because they were coming from so far away.\nWith pronoun replaced: The police were coming from so far away.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.1202, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.7150, -1.7635, -1.8116,\n -1.6340, -1.4580, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.1057, -2.1470, -2.1880, -2.0349, -1.8829, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.3735,\n -2.4116, -2.4495, -2.4872, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.4371,\n -2.4736, -2.5099, -2.3734, -2.2377, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "93", + "Fraction of T in Greenlist": "46.7%", + "z-score": "7.08", + "p value": "7.19e-13", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.3093, 1.6082, 1.4757, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.9245, 1.8034, 2.0605, 1.9415, 1.8257,\n 2.0738, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.0211, 2.2418, 2.1412, 2.0428, 1.9462, 2.1602,\n 2.3706, 2.2743, 2.4804, 2.3851, 2.5873, 2.4930, 2.6914, 2.5981,\n 2.7928, 2.7005, 2.8919, 2.8006, 2.9887, 2.8983, 3.0833, 2.9938,\n 3.1760, 3.0873, 3.2667, 3.1789, 3.3556, 3.2686, 3.4427, 3.3566,\n 3.5283, 3.4429, 3.6122, 3.5277, 3.6947, 3.6109, 3.7758, 3.6927,\n 3.8555, 3.7732, 3.9340, 3.8523, 4.0112, 3.9302, 4.0872, 4.0069,\n 4.1621, 4.0825, 4.2359, 4.1569, 4.3086, 4.2303, 4.3804, 4.3027,\n 4.4511, 4.3740, 4.5210, 4.4444, 4.5899, 4.5140, 4.6580, 4.5826,\n 4.7252, 4.6503, 4.7916, 4.7173, 4.8572, 4.7834, 4.9221, 4.8488,\n 4.9862, 4.9135, 5.0496, 4.9774, 5.1123, 5.0406, 5.1744, 5.1031,\n 5.2358, 5.1650, 5.2965, 5.2262, 5.3567, 5.2868, 5.4163, 5.3468,\n 5.4752, 5.4062, 5.5336, 5.4650, 5.5915, 5.5233, 5.6488, 5.5811,\n 5.7056, 5.6383, 5.7619, 5.6949, 5.8177, 5.7511, 5.8730, 5.8068,\n 5.9279, 5.8621, 5.9822, 5.9168, 6.0362, 5.9711, 6.0897, 6.0249,\n 6.1427, 6.0784, 6.1954, 6.1314, 6.2476, 6.1839, 6.2994, 6.2361,\n 6.3509, 6.2879, 6.4019, 6.3392, 6.4526, 6.3902, 6.5029, 6.4409,\n 6.5528, 6.4911, 6.6024, 6.5410, 6.6517, 6.5906, 6.7006, 6.6398,\n 6.7492, 6.6887, 6.7974, 6.7372, 6.8454, 6.7854, 6.8930, 6.8333,\n 6.9403, 6.8809, 6.9873, 6.9282, 7.0340, 6.9752, 7.0804])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Always before, Larry had helped Dad with his work. But he could not help him now, for Dad said that his boss at the railroad company would not want anyone but him to work in the office.\nWith pronoun replaced: Dad could not help him now.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.9635, -2.0000,\n -2.0364, -1.9068, -1.9432, -1.9795, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "49.0%", + "z-score": "7.8", + "p value": "3.2e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.3333,\n 3.2167, 3.1027, 3.3147, 3.5228, 3.7273, 3.9284, 3.8146, 4.0119,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.0657, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.0205, 3.9208, 3.8228, 4.0024, 4.1797, 4.3546,\n 4.5274, 4.4296, 4.6000, 4.7683, 4.9346, 4.8375, 4.7419, 4.6476,\n 4.5547, 4.4630, 4.6262, 4.7875, 4.6967, 4.8561, 4.7662, 4.6775,\n 4.5899, 4.7469, 4.9023, 5.0562, 4.9691, 5.1212, 5.2719, 5.4212,\n 5.3345, 5.2489, 5.1643, 5.0807, 4.9980, 5.1450, 5.2906, 5.4349,\n 5.3526, 5.2713, 5.1908, 5.3333, 5.4747, 5.6149, 5.5348, 5.6737,\n 5.8114, 5.9481, 5.8684, 5.7894, 5.7112, 5.6338, 5.5572, 5.6921,\n 5.8260, 5.9589, 6.0908, 6.0143, 6.1451, 6.0693, 6.1990, 6.1237,\n 6.2524, 6.3803, 6.3054, 6.4322, 6.3580, 6.2843, 6.4101, 6.3369,\n 6.2644, 6.3892, 6.3172, 6.2458, 6.1750, 6.1047, 6.0351, 5.9660,\n 5.8974, 5.8294, 5.9524, 5.8848, 6.0069, 6.1283, 6.0609, 6.1815,\n 6.3013, 6.4203, 6.3532, 6.4715, 6.5891, 6.7060, 6.6391, 6.7552,\n 6.8707, 6.9856, 6.9189, 6.8527, 6.7869, 6.7217, 6.6568, 6.7706,\n 6.8838, 6.9964, 6.9317, 6.8675, 6.8037, 6.9155, 7.0266, 7.1372,\n 7.0736, 7.1835, 7.2929, 7.4017, 7.3383, 7.4465, 7.5542, 7.6613,\n 7.5981, 7.5353, 7.4729, 7.4109, 7.3493, 7.4556, 7.5614, 7.6667,\n 7.7715, 7.7099, 7.8142, 7.7530, 7.8567, 7.7958])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I poured water from the bottle into the cup until it was empty.\nWith pronoun replaced: The cup was empty.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "188", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.337", + "p value": "0.368", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.0646, -0.8716, -0.6809, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.5168, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.4974, -0.5410, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.2221, -0.0886, -0.1325, 0.0000, -0.0439, 0.0875, 0.0436, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, 0.1287, 0.2568, 0.2134, 0.3405,\n 0.2971, 0.4233, 0.3800, 0.3369])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.0825, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 4.8488, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.4051, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.6883, 7.5615, 7.7150, 7.8667, 8.0167, 7.8928,\n 7.7710, 7.9196, 8.0667, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.4037, 8.5435, 8.4285, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.5191, 9.4088, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.6995, 9.5939, 9.7183, 9.6141, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.5067, 10.4083, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.3695, 10.2763, 10.1840, 10.2975, 10.2062,\n 10.3191, 10.4312, 10.3409, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.4935, 11.4080, 11.5111, 11.4263,\n 11.5290, 11.6311, 11.5471, 11.6487, 11.7498, 11.8503, 11.7672, 11.8673,\n 11.9669, 11.8846, 11.9837, 12.0824, 12.1805, 12.2782, 12.1967, 12.1158,\n 12.2132, 12.1329, 12.2298, 12.3263, 12.4223, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.5495, 12.4713, 12.5657, 12.4880, 12.4109, 12.3342, 12.2581,\n 12.3523, 12.2767, 12.3705, 12.4638, 12.3888, 12.3143, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.2794, 12.3718, 12.4638, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I put the cake away in the refrigerator. It has a lot of butter in it.\nWith pronoun replaced: The cake has a lot of butter in it.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, 0.0727, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.5298, 0.4606, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.3871, 0.5505, 0.4932, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.5871, 0.5315, 0.4763, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.8296, 0.7746, 0.9258, 0.8709, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.7399, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.7213, 0.6742, 0.8066, 0.7595, 0.8909,\n 0.8438, 0.9742, 0.9272, 1.0565, 1.0096, 1.1380, 1.0911, 1.0445,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.8154, 0.9415, 0.8963, 1.0215,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.7539, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.4550, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.8718, 7.0456, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.8174, 7.9704, 8.1216, 7.9931, 7.8667, 8.0167, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.6603, 8.5435, 8.6820, 8.8192, 8.7045, 8.5915, 8.7277, 8.8626,\n 8.9963, 9.1287, 9.2600, 9.3901, 9.2796, 9.4088, 9.5368, 9.6638,\n 9.5552, 9.6813, 9.8064, 9.9304, 9.8237, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.3257, 10.4444, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.5909, 10.7066, 10.8215, 10.9355, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.0746, 10.9769, 11.0883, 11.1991, 11.3091, 11.4184, 11.5271, 11.6351,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.7647, 11.8704, 11.9754, 12.0798,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.2040, 12.3063, 12.4081, 12.5093,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.6287, 12.5394, 12.6387, 12.7376,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.2410, 13.3361, 13.4308, 13.5250, 13.4390, 13.5329, 13.6264, 13.7194,\n 13.6343, 13.5499, 13.6427, 13.7350, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.5726, 14.4923, 14.5797, 14.6667,\n 14.7533, 14.8396, 14.9255, 15.0111, 14.9318, 15.0172, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Lionel is holding captive a scientist, Dr. Vardi, who has invented a device that turns animals invisible; Lionel plans to use it on Geoffrey and send him to steal nuclear material from an army vault.\nWith pronoun replaced: Lionel plans to use it on Geoffrey and send Lionel to steal nuclear material from an army vault.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, 0.0000, 0.2379, 0.1571, 0.3892, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.3333, 0.5298, 0.7237, 0.9152, 0.8444, 1.0328,\n 1.2189, 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.2710, 1.4446, 1.6164, 1.5483, 1.7178, 1.8856,\n 2.0517, 1.9829, 1.9149, 1.8475, 2.0107, 2.1723, 2.1049, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 1.8716, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.7767, 1.9298, 2.0817, 2.2323, 2.1700, 2.3190,\n 2.4669, 2.6135, 2.5508, 2.4887, 2.4271, 2.5717, 2.7153, 2.6536,\n 2.5925, 2.5318, 2.4717, 2.6131, 2.7534, 2.6933, 2.6336, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.3422, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.4453, 2.5802, 2.7143, 2.6576, 2.7906, 2.9227, 3.0540, 2.9971,\n 2.9406, 2.8845, 3.0143, 3.1433, 3.0872, 3.0315, 3.1593, 3.1038,\n 3.0486, 2.9938, 2.9394, 2.8853, 3.0114, 2.9575, 2.9040, 2.8508,\n 2.7979, 2.9225, 3.0464, 3.1696, 3.1166, 3.2389, 3.3606, 3.4816,\n 3.4283, 3.3754, 3.3228, 3.4427, 3.5619, 3.5093, 3.4570, 3.4050,\n 3.3534, 3.3020, 3.2509, 3.2002, 3.1497, 3.0995, 3.0496, 3.0000,\n 2.9507, 2.9016, 2.8528, 2.8043, 2.7560, 2.7080, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.3169, 0.5040, 0.4384, 0.3735, 0.3095, 0.4924,\n 0.6732, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.6083, 0.5505, 0.7124, 0.8729,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.7441, 0.6880, 0.8433,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.9812, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.9115, 0.8575, 0.8040, 0.7509, 0.8978, 1.0435, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 1.0139, 1.1547,\n 1.1028, 1.0512, 1.0000, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 1.0735, 1.0235, 0.9739, 0.9245, 1.0598, 1.1942, 1.3278, 1.2780,\n 1.2285, 1.1794, 1.1305, 1.0820, 1.0338, 1.1651, 1.2956, 1.2472,\n 1.1991, 1.1513, 1.2804, 1.2326, 1.1852, 1.1380, 1.0911, 1.2185,\n 1.1717, 1.1251, 1.0788, 1.2049, 1.3303, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.2700, 1.2244, 1.1790, 1.3019, 1.4241, 1.3786, 1.3333,\n 1.2883, 1.4093, 1.3644, 1.3197, 1.2752, 1.2309, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The foxes are getting in at night and attacking the chickens. They have gotten very bold.\nWith pronoun replaced: The foxes have gotten very bold.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.7685, -0.8165,\n -0.8642, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -0.8592, -0.9017, -0.9439, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -1.0445,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -0.9415, -0.9816, -0.8513,\n -0.8914, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 0.9608, 0.8724, 1.0999, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456, 0.8660,\n 1.0742, 1.2792, 1.1991, 1.4003, 1.3206, 1.2421, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.3641, 1.5492,\n 1.7321, 1.6577, 1.8378, 1.7638, 1.6908, 1.8677, 1.7951, 1.7233,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.6164, 1.5483, 1.4809, 1.6499,\n 1.8173, 1.7496, 1.6828, 1.8475, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.4757,\n 1.4171, 1.3590, 1.5097, 1.6591, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.5519, 1.4963, 1.4410, 1.5842,\n 1.7264, 1.8676, 2.0078, 1.9518, 2.0907, 2.0349, 1.9795, 2.1170,\n 2.0617, 2.1980, 2.1429, 2.0881, 2.0338, 1.9799, 2.1143, 2.0605,\n 2.0071, 2.1401, 2.2723, 2.2188, 2.1656, 2.2966, 2.2436, 2.1909,\n 2.1386, 2.0866, 2.0350, 1.9837, 1.9327, 2.0613, 2.0105, 1.9599,\n 1.9097, 2.0369, 2.1634, 2.1131, 2.0631, 2.0134, 1.9640, 1.9149,\n 1.8660, 1.9906, 1.9419, 1.8935, 2.0170, 2.1398, 2.0913, 2.2133,\n 2.1648, 2.1167, 2.2377, 2.1896, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 2.0726, 2.1913, 2.3094, 2.4269, 2.3798, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam pulled up a chair to the piano, but it was broken, so he had to stand instead.\nWith pronoun replaced: The piano was broken, so he had to stand instead.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -1.9245, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.2226, -2.0137, -2.0656,\n -2.1170, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -1.9262, -1.7362, -1.7865, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.6988, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.4056, -1.2443, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.2686, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.1921, -1.2326, -1.2730, -1.3131, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.3859, -1.2566, -1.2950, -1.3333,\n -1.2052, -1.2435, -1.2817, -1.3197, -1.3574, -1.3950, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "41.5%", + "z-score": "3.08", + "p value": "0.00104", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.1054, 2.3333,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.5690, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823, 2.8868,\n 3.0793])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Jane knocked on Susan's door but she did not get an answer.\nWith pronoun replaced: Susan did not get an answer.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 2.0466, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.6330,\n 1.9096, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.0889, 1.9795, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.5775, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.3276, 2.2404, 2.1546, 2.0702, 1.9870, 1.9052,\n 2.0948, 2.0135, 1.9333, 1.8543, 1.7765, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.8378, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.6524, 1.5823, 1.5131, 1.4446, 1.3770, 1.3101, 1.2439, 1.4142,\n 1.3483, 1.2831, 1.4506, 1.6166, 1.5511, 1.4863, 1.6498, 1.5852,\n 1.7467, 1.9066, 1.8419, 1.7778, 1.7143, 1.6514, 1.8084, 1.7457,\n 1.6837, 1.6222, 1.5613, 1.5010, 1.4412, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.5681, 1.5097, 1.6591, 1.6008, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.4963, 1.6398, 1.5842,\n 1.5291, 1.6710, 1.6160, 1.7566, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.6781, 1.8157, 1.7619, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.6337, 1.7679, 1.7158, 1.6641, 1.6127, 1.5617, 1.6941, 1.6432,\n 1.5926, 1.5423, 1.4923, 1.4427, 1.3933, 1.5236, 1.4743, 1.4254,\n 1.5544, 1.6827, 1.6336, 1.5848, 1.7119, 1.6632, 1.7894, 1.9149,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.4699, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.4093, 1.5298, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "135", + "Fraction of T in Greenlist": "67.8%", + "z-score": "14", + "p value": "1.44e-44", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.6664, 4.8742, 5.0779, 4.9373, 5.1371, 5.0000,\n 5.1962, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854, 5.3716, 5.5549,\n 5.7354, 5.9132, 5.7877, 5.9628, 5.8398, 5.7192, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.0379, 7.1813, 7.0812, 6.9824, 6.8849, 7.0268, 6.9305, 6.8354,\n 6.7414, 6.8819, 6.7890, 6.6973, 6.8364, 6.9743, 7.1110, 7.2466,\n 7.3810, 7.2900, 7.2001, 7.1111, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.5494, 7.6785, 7.8065, 7.9336, 8.0598, 7.9724, 8.0976, 8.0111,\n 8.1354, 8.0497, 8.1731, 8.0882, 8.2107, 8.3324, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.9285, 8.8443, 8.9612, 8.8778, 8.7952, 8.9113,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.2055, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.7619, 9.6814, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.2220, 10.1423, 10.2486, 10.3544, 10.4596, 10.5642, 10.6683, 10.7719,\n 10.8749, 10.9773, 11.0793, 11.1807, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.5000,\n 13.5897, 13.6789, 13.7679, 13.8564, 13.9446, 13.8683, 13.9562])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bob was playing cards with Adam and was way ahead. If Adam hadn't had a sudden run of good luck, he would have won.\nWith pronoun replaced: Adam would have won.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.0785, -1.8958, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.6690, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -2.1470, -1.9935, -2.0349, -2.0761, -2.1170,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.4116, -2.4495, -2.4872, -2.5247, -2.5620, -2.5990, -2.6359, -2.6726,\n -2.7091, -2.7454, -2.7815, -2.8174, -2.8532, -2.8887, -2.9241, -2.7852,\n -2.6472, -2.5099, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.7297, -2.5969, -2.4648, -2.3333,\n -2.3688, -2.2384, -2.1086, -1.9795, -2.0156, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "196", + "# Tokens in Greenlist": "84", + "Fraction of T in Greenlist": "42.9%", + "z-score": "5.77", + "p value": "3.88e-09", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.7698, 1.0441, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.8240, 1.7285, 1.6348, 1.5430,\n 1.4530, 1.3646, 1.5785, 1.4907, 1.7002, 1.6131, 1.8185, 2.0207,\n 1.9335, 1.8477, 2.0455, 1.9604, 1.8766, 1.7942, 1.7130, 1.9052,\n 1.8245, 1.7450, 1.9333, 2.1193, 2.0397, 2.2226, 2.4034, 2.5820,\n 2.7585, 2.6778, 2.5983, 2.5198, 2.6928, 2.8638, 2.7854, 2.7080,\n 2.8764, 3.0429, 3.2077, 3.1300, 3.0533, 3.2157, 3.3764, 3.2998,\n 3.2242, 3.1493, 3.0754, 3.2332, 3.1597, 3.3156, 3.2426, 3.1704,\n 3.3243, 3.4768, 3.4047, 3.3333, 3.2627, 3.1928, 3.1236, 3.2733,\n 3.2044, 3.3526, 3.2841, 3.4308, 3.3627, 3.5079, 3.4402, 3.5839,\n 3.5166, 3.4499, 3.3838, 3.3182, 3.4599, 3.3947, 3.3301, 3.4701,\n 3.6091, 3.5446, 3.6824, 3.8191, 3.9549, 4.0898, 4.0249, 3.9606,\n 3.8968, 4.0301, 4.1625, 4.0988, 4.0356, 4.1667, 4.2970, 4.4264,\n 4.3631, 4.3004, 4.4286, 4.5560, 4.4933, 4.4312, 4.3695, 4.3083,\n 4.4342, 4.3733, 4.4983, 4.4376, 4.3774, 4.5013, 4.6245, 4.7469,\n 4.6867, 4.6268, 4.7483, 4.6887, 4.6295, 4.5707, 4.5123, 4.6325,\n 4.7520, 4.6938, 4.8125, 4.9305, 5.0479, 4.9896, 4.9317, 5.0483,\n 4.9906, 4.9333, 5.0489, 5.1640, 5.1068, 5.2211, 5.3349, 5.4480,\n 5.5606, 5.5033, 5.4464, 5.3898, 5.5015, 5.6126, 5.5562, 5.5000,\n 5.6104, 5.7203, 5.8296, 5.7735])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I put the butterfly wing on the table and it broke.\nWith pronoun replaced: The table broke.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "23.3%", + "z-score": "-0.504", + "p value": "0.693", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.4436, -0.5040, -0.3131, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.2334, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.0412, 5.8797, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.7517, 7.6140, 7.7723, 7.6376, 7.5056,\n 7.3760, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.3333, 8.4770, 8.3560, 8.4984, 8.3795, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.5672, 8.4540, 8.5915, 8.4801, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.3422, 9.4685, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.5331, 9.4327, 9.3333, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.6251, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.1405, 10.2554, 10.3695, 10.4829, 10.5955, 10.5025, 10.6145,\n 10.5224, 10.6338, 10.5427, 10.6534, 10.7635, 10.8729, 10.9816, 11.0897,\n 11.1971, 11.1073, 11.2142, 11.3204, 11.4261, 11.5311, 11.6356, 11.7395,\n 11.8427, 11.9455, 11.8571, 11.7696, 11.8719, 11.7851, 11.8870, 11.8010,\n 11.9024, 12.0032, 11.9181, 12.0185, 11.9341, 12.0341, 11.9504, 11.8673,\n 11.7849, 11.8846, 11.8028, 11.7217, 11.8210, 11.7405, 11.8393, 11.7595,\n 11.8579, 11.7787, 11.7000, 11.6220, 11.7200, 11.6425, 11.7401, 11.6632,\n 11.5868, 11.5109, 11.6082, 11.5329, 11.4581, 11.5549, 11.4806, 11.5771,\n 11.5033, 11.4300, 11.3572, 11.4533, 11.3809, 11.4766, 11.4047, 11.3333,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.4765, 11.5706, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Paul tried to call George on the phone, but he wasn't successful.\nWith pronoun replaced: Paul wasn't successful.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 2.0370, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.5627, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 2.3238, 2.5538, 2.7791, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.4101, 3.2998, 3.1918, 3.0861,\n 3.2883, 3.1840, 3.0817, 2.9814, 2.8830, 2.7863, 2.9823, 3.1754,\n 3.3657, 3.5533, 3.7383, 3.6407, 3.5447, 3.4503, 3.3574, 3.2660,\n 3.4463, 3.3558, 3.2667, 3.1789, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.0151, 2.9329, 3.1052, 3.0237, 2.9433, 2.8638, 2.7854, 2.7080,\n 2.8764, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.4286, 2.5927,\n 2.5207, 2.4495, 2.6112, 2.7713, 2.7001, 2.6296, 2.7875, 2.7175,\n 2.6481, 2.8039, 2.7349, 2.6667, 2.8203, 2.7524, 2.6852, 2.6186,\n 2.5527, 2.4874, 2.6381, 2.5731, 2.5087, 2.4449, 2.5934, 2.7406,\n 2.6768, 2.6135, 2.5508, 2.4887, 2.6336, 2.7775, 2.7153, 2.6536,\n 2.5925, 2.7344, 2.6735, 2.6131, 2.5532, 2.4938, 2.4348, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.5373, 2.6742, 2.6163, 2.5589, 2.5019,\n 2.4453, 2.5802, 2.7143, 2.6576, 2.6014, 2.5456, 2.6781, 2.6224,\n 2.5672, 2.5123, 2.4578, 2.4037, 2.5343, 2.4803, 2.4267, 2.3735,\n 2.5026, 2.6309, 2.5776, 2.5247, 2.4721, 2.4198, 2.5466, 2.6726,\n 2.6203, 2.5683, 2.5166, 2.6414, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.5099, 2.4597, 2.4099, 2.3603, 2.4822, 2.6034, 2.5538,\n 2.5044, 2.4553, 2.4065, 2.5265, 2.6458, 2.5969, 2.5483, 2.5000,\n 2.6182, 2.5700, 2.5220, 2.4744, 2.4269, 2.3798, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "77", + "Fraction of T in Greenlist": "38.7%", + "z-score": "4.46", + "p value": "4.08e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 3.6742,\n 3.5176, 3.3665, 3.2205, 3.0792, 3.3221, 3.1844, 3.0509, 2.9212,\n 2.7952, 2.6726, 2.9055, 3.1334, 3.0123, 2.8943, 2.7791, 2.6667,\n 2.5568, 2.4495, 2.3445, 2.2418, 2.4585, 2.6713, 2.5690, 2.7775,\n 2.6765, 2.8808, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.1320, 2.0455, 2.2404, 2.4327, 2.6222, 2.5352, 2.4495,\n 2.3651, 2.5504, 2.7333, 2.6491, 2.8292, 3.0071, 2.9231, 2.8402,\n 2.7585, 2.9329, 2.8518, 2.7717, 2.6928, 2.6148, 2.5378, 2.7080,\n 2.8764, 3.0429, 3.2077, 3.3708, 3.2928, 3.4538, 3.6133, 3.7712,\n 3.6931, 3.6159, 3.5396, 3.4641, 3.3895, 3.3156, 3.2426, 3.1704,\n 3.3243, 3.2525, 3.1814, 3.1111, 3.0415, 2.9726, 3.1236, 3.2733,\n 3.2044, 3.1363, 3.0688, 3.0019, 2.9357, 2.8701, 2.8051, 2.9515,\n 3.0967, 3.2408, 3.1755, 3.3182, 3.2533, 3.3947, 3.3301, 3.2660,\n 3.2025, 3.1395, 3.0770, 3.0151, 2.9537, 2.8928, 2.8324, 2.7724,\n 2.9103, 3.0471, 3.1831, 3.1229, 3.0632, 3.1977, 3.3314, 3.4641,\n 3.4042, 3.5359, 3.6667, 3.6068, 3.5474, 3.4884, 3.6178, 3.5590,\n 3.5007, 3.4428, 3.3853, 3.3282, 3.4558, 3.5827, 3.7087, 3.8341,\n 3.9586, 4.0825, 4.2056, 4.3280, 4.2699, 4.3915, 4.3336, 4.4544,\n 4.5744, 4.5166, 4.4593, 4.5783, 4.5212, 4.6395, 4.7572, 4.8742,\n 4.9906, 5.1064, 5.0489, 4.9918, 4.9351, 4.8787, 4.8227, 4.7670,\n 4.7117, 4.8260, 4.7709, 4.7161, 4.6616, 4.6074, 4.5535, 4.6667,\n 4.7792, 4.7255, 4.6720, 4.6188, 4.5659, 4.5134, 4.4611])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Look! There is a shark swimming right below that duck! It had better get away to safety fast!\nWith pronoun replaced: The shark had better get away to safety fast!\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -1.9599, -2.0250, -1.7408, -1.8074, -1.5323, -1.2632, -1.3333,\n -1.0722, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -0.8923, -0.9409, -0.7809, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.5238, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.4428, -0.4857, -0.3522, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 2.0000, 2.4019, 2.7775, 2.5342, 2.8868,\n 3.2206, 2.9938, 2.7815, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.4772,\n 5.3199, 5.1671, 5.0186, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.4738, 6.3509,\n 6.5166, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.2667, 7.4174, 7.5664, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.3152, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.3984, 8.2916, 8.1862, 8.0822, 8.2178, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.7039, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.2351, 9.3582, 9.4803, 9.3834,\n 9.5047, 9.6251, 9.7447, 9.6490, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.0472, 10.1621, 10.2763, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.7635, 10.8729, 10.9816, 11.0897,\n 10.9998, 10.9107, 10.8224, 10.7349, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 10.9898, 11.0952, 11.2001, 11.3043, 11.2194, 11.1352, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.8846, 11.9837, 11.9020, 11.8210, 11.9197, 12.0180, 12.1158,\n 12.2132, 12.3100, 12.2298, 12.3263, 12.4223, 12.5179, 12.4384, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.7378, 12.6597, 12.7532, 12.6757, 12.5986,\n 12.5221, 12.4460, 12.3705, 12.4638, 12.5568, 12.6494, 12.5745, 12.5000,\n 12.5923, 12.5183, 12.4448, 12.5367, 12.6283, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The trophy doesn't fit into the brown suitcase because it is too large.\nWith pronoun replaced: The suitcase is too large.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.1690, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.0486, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.1849, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, 0.0452, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n 0.1332, 0.0886, 0.0442, 0.0000, -0.0439, 0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.0420, 0.0838, 0.0418, 0.0000,\n -0.0416, 0.0829, 0.0413, 0.1650, 0.1234, 0.0821, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.6590, 1.9245, 2.1831, 2.0605, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.5533, 2.7852, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.8431, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.5118, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.0844, 4.9747, 5.1490, 5.0410, 5.2129, 5.3825, 5.5500, 5.7155,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 6.9945, 6.8931,\n 7.0379, 6.9378, 7.0812, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 7.8889, 8.0212, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.6667, 8.5715, 8.6976, 8.8227, 8.9469,\n 8.8529, 8.9763, 9.0987, 9.2202, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.0926, 10.0021,\n 9.9124, 9.8236, 9.9373, 9.8494, 9.9625, 9.8753, 9.7890, 9.7034,\n 9.8159, 9.7312, 9.8430, 9.7590, 9.6757, 9.5931, 9.5112, 9.6225,\n 9.5413, 9.4608, 9.3810, 9.3017, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.7224, 9.8293, 9.9357, 10.0416,\n 9.9648, 10.0701, 10.1749, 10.0987, 10.2029, 10.3065, 10.2310, 10.1559,\n 10.2591, 10.3617, 10.2872, 10.3893, 10.4909, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 11.0904, 11.1886, 11.1148, 11.2126, 11.3099, 11.2366,\n 11.3335, 11.4300, 11.5261, 11.6217, 11.7169, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.2794, 12.2068, 12.2992, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Joe's uncle can still beat him at tennis, even though he is 30 years older.\nWith pronoun replaced: Joe is 30 years older.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.6348, 0.5601, 0.4865, 0.6901, 0.8907, 0.8165,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.0328,\n 0.9623, 1.1476, 1.3308, 1.2599, 1.1898, 1.1206, 1.0523, 1.2309,\n 1.1628, 1.0954, 1.0289, 0.9631, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 0.8003, 0.9671, 1.1323,\n 1.0705, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.3460, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 1.1882,\n 1.3318, 1.2778, 1.4201, 1.3663, 1.5073, 1.4535, 1.4001, 1.3472,\n 1.4863, 1.4335, 1.3810, 1.5187, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.5818, 1.5303, 1.4792, 1.4284, 1.3779, 1.5110, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.2623, 1.2136, 1.3443, 1.2956, 1.2472,\n 1.3768, 1.5055, 1.4570, 1.5848, 1.5363, 1.6632, 1.6148, 1.5667,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.6737, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.7310, 1.6843, 1.6378, 1.5916, 1.5457, 1.6667,\n 1.6208, 1.5752, 1.5298, 1.4846, 1.4397, 1.3950, 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284, 3.1177, 2.9439, 2.7778,\n 3.0551, 3.3235, 3.1623, 3.4219, 3.6742, 3.9196, 3.7626, 3.6108, 3.4641,\n 3.7017, 3.9337, 3.7905, 3.6515, 3.5165, 3.7417, 3.9620, 3.8297, 3.7009,\n 3.9158, 4.1265, 4.0000, 3.8765, 4.0825, 4.2848, 4.1633, 4.0446, 3.9284,\n 4.1260, 4.3205, 4.2060, 4.0937, 3.9837, 4.1740, 4.3614, 4.2528, 4.1461,\n 4.3301, 4.5115, 4.4061, 4.5847, 4.7610, 4.9348, 4.8305, 4.7278, 4.6268,\n 4.7977, 4.9666, 4.8667, 4.7683, 4.6715, 4.8375, 5.0017, 4.9058, 4.8113,\n 4.9731, 5.1332, 5.0395, 5.1977, 5.3541, 5.5090, 5.4160, 5.3243, 5.2338,\n 5.3865, 5.5377, 5.4480, 5.3594, 5.2719, 5.4212, 5.5690, 5.4822, 5.3964,\n 5.5426, 5.6874, 5.6023, 5.7457, 5.8878, 6.0288, 5.9442, 5.8605, 5.7778,\n 5.9171, 6.0553, 5.9732, 5.8919, 5.8114, 5.9481, 6.0837, 6.0038, 5.9247,\n 6.0590, 6.1923, 6.1137, 6.2459, 6.3770, 6.5072, 6.4291, 6.3517, 6.2750,\n 6.4039, 6.5320, 6.4558, 6.3803, 6.3054, 6.4322, 6.5582, 6.4838, 6.4101,\n 6.5350, 6.6591, 6.5857, 6.7089, 6.8313, 6.9529, 6.8799, 6.8075, 6.7358,\n 6.8563, 6.9762, 6.9048, 6.8339, 6.7637, 6.8825, 7.0006, 6.9307, 6.8614,\n 6.9786, 7.0952, 7.0262, 7.1420, 7.2572, 7.3717, 7.3030, 7.4168, 7.5299,\n 7.6424, 7.5740, 7.5061, 7.4386, 7.5504, 7.6615, 7.5944, 7.5277, 7.4615,\n 7.5719, 7.6816, 7.6158, 7.5503, 7.6594, 7.7679, 7.7028, 7.8107, 7.9181,\n 8.0249, 7.9601, 7.8956, 7.8316, 7.9377, 8.0433, 7.9796, 7.9162, 7.8533,\n 7.9582, 8.0627, 8.0000, 7.9377, 8.0416, 8.1449, 8.0829, 8.1858, 8.2882,\n 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The large ball crashed right through the table because it was made of steel.\nWith pronoun replaced: The large ball was made of steel.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "19.6%", + "z-score": "-1.76", + "p value": "0.961", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.5345, 0.4402, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.4444, -0.4977, -0.3303, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.9258, -0.9734, -1.0206,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.6036,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.6057, -1.6444, -1.6830, -1.7213, -1.7595, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.6466, -1.5159, -1.5539, -1.5916, -1.6292, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.7233, -1.7599])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.0667, 7.9472, 8.0928, 7.9754, 7.8598, 7.7460,\n 7.6339, 7.5234, 7.6681, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.3521, 8.4853,\n 8.6173, 8.5149, 8.6459, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.0139, 9.1380, 9.2611, 9.1652,\n 9.0702, 8.9763, 9.0987, 9.0057, 9.1273, 9.0354, 8.9444, 8.8544,\n 8.9752, 8.8860, 9.0060, 9.1252, 9.2435, 9.1553, 9.2729, 9.3897,\n 9.5057, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 9.8159, 9.7312, 9.8430, 9.9542, 9.8702, 9.7869, 9.8975, 9.8150,\n 9.9249, 10.0342, 9.9524, 10.0611, 9.9800, 9.8995, 9.8197, 9.7405,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.7224, 9.6456, 9.5695, 9.6764,\n 9.7828, 9.8887, 9.8131, 9.9184, 9.8433, 9.9481, 10.0523, 10.1559,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.4909, 10.5921, 10.5181, 10.6187,\n 10.5453, 10.4724, 10.3999, 10.3280, 10.2565, 10.3566, 10.2856, 10.2151,\n 10.1450, 10.0753, 10.1750, 10.2743, 10.3730, 10.3038, 10.4021, 10.3333,\n 10.4312, 10.5286, 10.6256, 10.7222, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam broke both his ankles and he's walking with crutches. But a month or so from now they should be better.\nWith pronoun replaced: The crutches should be better.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "92", + "Fraction of T in Greenlist": "46.2%", + "z-score": "6.92", + "p value": "2.31e-12", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 3.2660, 2.8868,\n 2.5560, 2.9593, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868, 2.6605, 2.4495,\n 2.2517, 2.0656, 2.3938, 2.2156, 2.5281, 2.8284, 2.6558, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 2.5924, 2.4495, 2.7136, 2.5744, 2.4398, 2.3094,\n 2.5627, 2.8098, 2.6811, 2.9212, 3.1558, 3.0290, 3.2577, 3.4816, 3.3566,\n 3.5753, 3.4528, 3.3333, 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 3.9284,\n 4.1260, 4.0119, 3.9001, 3.7905, 3.9837, 4.1740, 4.0657, 4.2528, 4.1461,\n 4.0415, 4.2251, 4.1219, 4.0205, 3.9208, 4.1008, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.4296, 4.6000, 4.7683, 4.6715, 4.8375, 4.7419, 4.6476, 4.8113,\n 4.7181, 4.6262, 4.5356, 4.6967, 4.8561, 4.7662, 4.9237, 5.0795, 4.9904,\n 5.1444, 5.2970, 5.2086, 5.1212, 5.0350, 4.9497, 5.1000, 5.0156, 4.9322,\n 4.8497, 4.9980, 5.1450, 5.0630, 5.2085, 5.3526, 5.2713, 5.4140, 5.5556,\n 5.4747, 5.3947, 5.3156, 5.2372, 5.3769, 5.2992, 5.2223, 5.1461, 5.2842,\n 5.4212, 5.3455, 5.4813, 5.6160, 5.5407, 5.6743, 5.8069, 5.7320, 5.8635,\n 5.7892, 5.7155, 5.8458, 5.7726, 5.7001, 5.6282, 5.7572, 5.8853, 5.8138,\n 5.9409, 6.0671, 5.9960, 6.1213, 6.2458, 6.1750, 6.1047, 6.0351, 5.9660,\n 6.0892, 6.0205, 5.9524, 5.8848, 6.0069, 6.1283, 6.0609, 6.1815, 6.3013,\n 6.2342, 6.3532, 6.4715, 6.4048, 6.5223, 6.4559, 6.3901, 6.5067, 6.4413,\n 6.3762, 6.3117, 6.4274, 6.5424, 6.4781, 6.5924, 6.7061, 6.6421, 6.7551,\n 6.8675, 6.8037, 6.7404, 6.6775, 6.6150, 6.7264, 6.6642, 6.6024, 6.5410,\n 6.6517, 6.7618, 6.7006, 6.8101, 6.9190, 6.8580, 6.9663, 7.0741, 7.0133,\n 6.9530, 6.8930, 6.8333, 6.9403, 6.8809, 6.8219, 6.7632, 6.8695, 6.9752,\n 6.9167])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "50.7%", + "z-score": "6.88", + "p value": "2.93e-12", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.0012, 3.8490, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.6098, 3.4816, 3.3566, 3.2348, 3.1160, 3.0000,\n 2.8868, 2.7761, 2.9913, 2.8823, 2.7757, 2.9856, 3.1918, 3.3947,\n 3.2883, 3.4873, 3.3824, 3.5777, 3.7700, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.4809, 4.6568, 4.5544, 4.7278, 4.8990,\n 5.0680, 4.9666, 4.8667, 4.7683, 4.6715, 4.5760, 4.4820, 4.6476,\n 4.5547, 4.4630, 4.3727, 4.5356, 4.4462, 4.3580, 4.5186, 4.4313,\n 4.5899, 4.7469, 4.9023, 5.0562, 4.9691, 5.1212, 5.2719, 5.4212,\n 5.3345, 5.4822, 5.6285, 5.7735, 5.6874, 5.8310, 5.9732, 6.1143,\n 6.2541, 6.1685, 6.3070, 6.2222, 6.3595, 6.4957, 6.6308, 6.5465,\n 6.4632, 6.3807, 6.2991, 6.2183, 6.1382, 6.2716, 6.1923, 6.1137,\n 6.0359, 6.1680, 6.0908, 6.0143, 6.1451, 6.0693, 6.1990, 6.3278,\n 6.4558, 6.5828, 6.5072, 6.6332, 6.7585, 6.8828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Grant worked hard to harvest his beans so he and his family would have enough to eat that winter, His friend Henry let him stack them in his barn where they would dry. Later, he and Tatyana would shell them and cook them for their Sunday dinners.\nWith pronoun replaced: His friend Henry let him stack Grant and his family in his barn where they would dry.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, 0.0000, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.2449, 0.1952, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.0476, 0.0000, -0.0473, -0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.6025, 0.5108, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 1.0742, 1.2792, 1.4812, 1.6803, 1.8766, 2.0702, 2.2611, 2.4495,\n 2.6354, 2.8189, 3.0000, 3.1789, 3.0924, 3.2686, 3.1829, 3.3566,\n 3.2717, 3.4429, 3.6122, 3.7796, 3.9452, 4.1090, 4.0234, 4.1851,\n 4.1003, 4.2601, 4.4182, 4.3339, 4.2507, 4.1684, 4.0872, 4.2426,\n 4.3966, 4.5491, 4.7001, 4.8497, 4.9980, 5.1450, 5.2906, 5.4349,\n 5.5780, 5.7199, 5.8605, 6.0000, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.0287, 6.1644, 6.2991, 6.4327, 6.5653, 6.6968, 6.6157, 6.7462,\n 6.6658, 6.7952, 6.9237, 6.8439, 6.7648, 6.8922, 6.8138, 6.7361,\n 6.8624, 6.7854, 6.9107, 6.8343, 6.9587, 7.0823, 7.2051, 7.3271,\n 7.2510, 7.1755, 7.2966, 7.2217, 7.1474, 7.2675, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.8988, 8.0139, 8.1282, 8.2420,\n 8.3550, 8.4674, 8.5792, 8.6903, 8.8008, 8.7270, 8.8369, 8.7636,\n 8.8728, 8.8000, 8.9086, 9.0167, 9.1242, 9.2311, 9.3374, 9.2651,\n 9.3708, 9.2990, 9.4042, 9.5089, 9.6130, 9.5416, 9.6452, 9.5743,\n 9.5038, 9.4338, 9.5369, 9.4673, 9.3982, 9.5007, 9.6028, 9.7043,\n 9.8054, 9.9060, 10.0061, 10.1058, 10.2050, 10.3038, 10.4021, 10.5000,\n 10.5974, 10.6944, 10.6256, 10.7222, 10.6538, 10.7500, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Jane knocked on the door, and Susan answered it. She invited her to come out.\nWith pronoun replaced: Jane invited her to come out.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.6082, 1.8974, 2.1776, 2.4495,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.4345, 2.6726, 2.9055, 3.1334, 3.0123, 3.2348, 3.1160, 3.0000,\n 2.8868, 3.1027, 2.9913, 2.8823, 2.7757, 2.6713, 2.8804, 2.7775,\n 2.6765, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.4004, 2.3094,\n 2.5064, 2.4163, 2.3276, 2.2404, 2.1546, 2.0702, 2.2611, 2.4495,\n 2.3651, 2.2819, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.0158, 1.9413, 2.1167, 2.0426, 2.2156,\n 2.1420, 2.0692, 1.9973, 1.9262, 1.8559, 1.7865, 1.7178, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.5511, 1.4863, 1.4222, 1.3587,\n 1.5213, 1.6823, 1.6186, 1.5556, 1.4931, 1.4313, 1.3700, 1.5275,\n 1.4664, 1.6222, 1.5613, 1.5010, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.6366, 0.5855, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, -0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.3916, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.8108, 5.6667,\n 5.5261, 5.7155, 5.9017, 6.0849, 5.9479, 5.8140, 5.9944, 5.8635,\n 6.0413, 5.9132, 5.7877, 5.6647, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 7.0387, 6.9204, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.1525, 7.3033, 7.4524, 7.6000, 7.7460,\n 7.8905, 7.7784, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.5067, 8.6418, 8.7758, 8.9086, 9.0401, 8.9324, 8.8260, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.7376,\n 9.8601, 9.7574, 9.6559, 9.7778, 9.8987, 10.0188, 9.9187, 10.0380,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.4083, 10.5236, 10.6380, 10.7517,\n 10.8647, 10.9769, 11.0883, 10.9917, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.2414, 11.1480, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.0096, 11.9187, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.6387, 12.5503,\n 12.6492, 12.7476, 12.8456, 12.9430, 13.0400, 13.1364, 13.0493, 13.1453,\n 13.2410, 13.3361, 13.4308, 13.3447, 13.2593, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.9185, 14.0096, 13.9262,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.3970, 14.4850, 14.5726, 14.6599, 14.7468, 14.8333,\n 14.9195, 14.8396, 14.9255, 15.0111, 15.0964, 15.1813, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Dan had to stop Bill from toying with the injured bird. He is very cruel.\nWith pronoun replaced: Bill is very cruel.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.9868, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -2.0381, -1.6667, -1.7457, -1.8226, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.9369, -2.0000,\n -2.0620, -2.1229, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.2299, -2.2819, -2.3333, -2.1193, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.6186,\n -2.6613, -2.7037, -2.7457, -2.7875, -2.8289, -2.8701, -2.6992, -2.7406,\n -2.7818, -2.6135, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.8505, -2.8892, -2.9277, -2.9659, -3.0039, -2.8485, -2.8868,\n -2.9247, -2.9625, -3.0000, -3.0373, -3.0744, -3.1113, -3.1479, -3.1844,\n -3.0339, -3.0706, -3.1071, -3.1433, -3.1794, -3.0315, -3.0677, -3.1038,\n -3.1396, -3.1753, -3.2107, -3.2460, -3.1013, -3.1368, -3.1720, -3.0290,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.5286, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "87", + "Fraction of T in Greenlist": "43.7%", + "z-score": "6.1", + "p value": "5.36e-10", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 0.5774,\n 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142, 1.2702, 1.5852, 1.4444,\n 1.3093, 1.6082, 1.8974, 1.7628, 2.0412, 1.9096, 1.7823, 1.6590, 1.9245,\n 1.8034, 1.6859, 1.9415, 1.8257, 2.0738, 1.9599, 1.8489, 2.0889, 2.3238,\n 2.2133, 2.1054, 2.0000, 2.2269, 2.1229, 2.0211, 1.9215, 2.1412, 2.0428,\n 1.9462, 2.1602, 2.0647, 2.2743, 2.1798, 2.0870, 2.2916, 2.4930, 2.4004,\n 2.5981, 2.5064, 2.4163, 2.6098, 2.5205, 2.4327, 2.6222, 2.5352, 2.4495,\n 2.3651, 2.5504, 2.4667, 2.3842, 2.5660, 2.7456, 2.6632, 2.8402, 3.0151,\n 3.1879, 3.1052, 3.0237, 2.9433, 3.1129, 3.2806, 3.2004, 3.1211, 3.0429,\n 3.2077, 3.3708, 3.5322, 3.4538, 3.3764, 3.2998, 3.4586, 3.6159, 3.5396,\n 3.6950, 3.6193, 3.5443, 3.6977, 3.6233, 3.5496, 3.7011, 3.6279, 3.5556,\n 3.4839, 3.6332, 3.5620, 3.4915, 3.6389, 3.7852, 3.9302, 4.0740, 4.0032,\n 3.9331, 3.8636, 4.0056, 3.9365, 3.8680, 4.0085, 3.9404, 4.0795, 4.0119,\n 3.9448, 4.0825, 4.2191, 4.1522, 4.2877, 4.2212, 4.1552, 4.2893, 4.2237,\n 4.1586, 4.2914, 4.2267, 4.1625, 4.0988, 4.2301, 4.1667, 4.1038, 4.2339,\n 4.3631, 4.4915, 4.6190, 4.7458, 4.8717, 4.9969, 4.9333, 5.0576, 4.9943,\n 5.1177, 5.0548, 4.9923, 5.1146, 5.2362, 5.1739, 5.2947, 5.2327, 5.1711,\n 5.2909, 5.2297, 5.1689, 5.2877, 5.2272, 5.1671, 5.1073, 5.2251, 5.1657,\n 5.1066, 5.2235, 5.3398, 5.4554, 5.5705, 5.6849, 5.7987, 5.9120, 5.8525,\n 5.9651, 5.9059, 6.0177, 5.9588, 5.9002, 6.0113, 6.1219, 6.0635, 6.1734,\n 6.1153, 6.0575, 6.1667, 6.1091, 6.0519, 6.1604, 6.1034, 6.0468, 5.9905,\n 6.0982])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Tom said \"Check\" to Ralph as he took his bishop.\nWith pronoun replaced: Tom said \"Check\" to Ralph as he took Ralph's bishop.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "192", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.67", + "p value": "0.0478", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.1170,\n 1.8257, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321, 2.1004, 1.9052,\n 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142, 1.2702, 1.1323, 1.4444,\n 1.3093, 1.1793, 1.4757, 1.3480, 1.6330, 1.5076, 1.3862, 1.2687, 1.1547,\n 1.4237, 1.3112, 1.2019, 1.0954, 1.3525, 1.2472, 1.4968, 1.3926, 1.2910,\n 1.1918, 1.0948, 1.3333, 1.5671, 1.4697, 1.3744, 1.6013, 1.5068, 1.4142,\n 1.3234, 1.2344, 1.4530, 1.3646, 1.2778, 1.1926, 1.4045, 1.3198, 1.5275,\n 1.7321, 1.6471, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.5542, 1.7450, 1.6667, 1.8543, 2.0397, 1.9612, 2.1436, 2.3238, 2.2453,\n 2.1678, 2.3448, 2.2678, 2.1918, 2.1167, 2.0426, 1.9695, 1.8972, 1.8257,\n 1.7552, 1.9262, 2.0954, 2.0247, 1.9548, 2.1213, 2.0517, 1.9829, 1.9149,\n 1.8475, 1.7809, 1.7150, 1.6498, 1.8116, 1.9720, 1.9066, 1.8419, 2.0000,\n 1.9355, 1.8716, 1.8084, 1.7457, 1.6837, 1.6222, 1.7767, 1.7154, 1.6547,\n 1.8071, 1.7465, 1.8974, 1.8370, 1.7772, 1.7179, 1.6591, 1.8074, 1.7488,\n 1.6906, 1.6330, 1.7792, 1.7217, 1.8664, 1.8091, 1.9524, 1.8953, 1.8385,\n 1.9803, 2.1210, 2.0642, 2.0078, 1.9518, 1.8962, 1.8411, 1.7864, 1.7321,\n 1.8699, 1.8157, 1.7619, 1.7085, 1.8446, 1.7913, 1.9263, 2.0605, 2.0071,\n 1.9540, 2.0868, 2.0339, 1.9813, 1.9291, 1.8773, 1.8257, 1.7746, 1.9052,\n 2.0350, 1.9837, 1.9327, 1.8821, 1.8317, 1.7817, 1.9097, 2.0369, 1.9868,\n 1.9370, 2.0631, 2.0134, 1.9640, 1.9149, 1.8660, 1.8175, 1.7693, 1.7213,\n 1.6737, 1.6262, 1.7498, 1.8728, 1.8252, 1.7780, 1.8999, 1.8527, 1.8058,\n 1.7592, 1.7128, 1.6667])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.0119, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.0037, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.3708, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.5261, 5.7155, 5.5783, 5.7646, 5.9479, 5.8140, 5.9944, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.7269, 6.5991, 6.4738, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.7426, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.4037, 8.2885, 8.4285, 8.3152, 8.4540, 8.5915, 8.7277, 8.6164,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.6813, 9.8064, 9.9304, 10.0535, 9.9469, 10.0692, 10.1905,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.3411, 10.4592, 10.5763, 10.6927,\n 10.8082, 10.9229, 11.0368, 11.1500, 11.2623, 11.1614, 11.0615, 10.9626,\n 11.0746, 11.1860, 11.2966, 11.1991, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.2414, 11.1480, 11.0554, 11.1640, 11.2719, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.8018, 11.7108, 11.6206, 11.5311, 11.6356, 11.5470,\n 11.4592, 11.5632, 11.4762, 11.5797, 11.4935, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.4289, 12.5264, 12.6234, 12.7199, 12.8160, 12.7329, 12.8285,\n 12.9238, 12.8414, 12.9363, 13.0307, 13.1246, 13.2182, 13.3113, 13.2299,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 14.0687, 13.9897, 14.0784, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.2737, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I sallied out for a bit of food, more to pass the time than because I wanted it.\nWith pronoun replaced: I sallied out for a bit of food, more to pass the time than because I wanted food.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165, 0.5774,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774, 0.4201, 0.8165,\n 0.6623, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142, 1.2702, 1.1323, 1.0000,\n 0.8729, 0.7505, 0.6325, 0.5185, 0.8165, 0.7035, 0.5941, 0.4880, 0.7698,\n 0.6644, 0.5620, 0.8321, 0.7303, 0.6312, 0.5345, 0.4402, 0.6963, 0.6025,\n 0.8513, 1.0948, 1.0000, 1.2372, 1.1431, 1.3744, 1.6013, 1.5068, 1.4142,\n 1.3234, 1.5430, 1.7589, 1.6678, 1.5785, 1.4907, 1.7002, 1.6131, 1.5275,\n 1.4434, 1.6471, 1.5635, 1.4812, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.5542, 1.4765, 1.6667, 1.8543, 1.7765, 1.9612, 1.8838, 1.8074, 1.7321,\n 1.6577, 1.5843, 1.5119, 1.4403, 1.3697, 1.5475, 1.4771, 1.4076, 1.3389,\n 1.5131, 1.4446, 1.3770, 1.5483, 1.4809, 1.4142, 1.3483, 1.2831, 1.4506,\n 1.3856, 1.5511, 1.7150, 1.6498, 1.8116, 1.7467, 1.9066, 2.0651, 2.0000,\n 1.9355, 1.8716, 2.0276, 2.1822, 2.1182, 2.0548, 1.9920, 2.1442, 2.0817,\n 2.0197, 1.9582, 2.1082, 2.0470, 1.9863, 2.1344, 2.0739, 2.0140, 1.9545,\n 1.8956, 2.0412, 1.9825, 2.1268, 2.2699, 2.2111, 2.3529, 2.2943, 2.2361,\n 2.1783, 2.1210, 2.0642, 2.0078, 1.9518, 1.8962, 2.0349, 1.9795, 1.9245,\n 1.8699, 2.0068, 1.9524, 1.8983, 2.0338, 1.9799, 1.9263, 1.8732, 1.8204,\n 1.9540, 1.9013, 2.0339, 2.1656, 2.1128, 2.2436, 2.1909, 2.3206, 2.4495,\n 2.3967, 2.3443, 2.2923, 2.4198, 2.5466, 2.4944, 2.4426, 2.3912, 2.5166,\n 2.4653, 2.4142, 2.3635, 2.4877, 2.4371, 2.3868, 2.5099, 2.4597, 2.4099,\n 2.3603, 2.3110, 2.4327, 2.3835, 2.5044, 2.6247, 2.5754, 2.6949, 2.6458,\n 2.5969, 2.5483, 2.5000, 2.4520, 2.4042, 2.3567, 2.3094, 2.4269, 2.3798,\n 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.0000, 4.3235, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.8889, 5.6737, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 6.7489, 6.5727,\n 6.4019, 6.5924, 6.7795, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.1756, 7.3485, 7.5186, 7.6862, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 7.8923, 8.0498, 7.9097, 8.0656, 8.2195, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.5491, 8.6948, 8.8389, 8.9815,\n 8.8522, 8.9935, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.2952,\n 9.1735, 9.0536, 9.1890, 9.3231, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.6156, 9.7442, 9.8716, 9.9980, 9.8852, 10.0107, 9.8995,\n 10.0242, 10.1479, 10.2706, 10.1614, 10.2833, 10.1756, 10.2967, 10.4169,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.6793, 10.7955, 10.9109,\n 11.0254, 10.9229, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.2846, 11.1860, 11.0883, 11.1991, 11.3091, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.5515, 11.6584, 11.7647, 11.8704, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.0096, 12.1125, 12.0218, 12.1244,\n 12.2263, 12.1366, 12.2381, 12.3391, 12.4395, 12.5394, 12.4508, 12.5503,\n 12.6492, 12.7476, 12.6601, 12.7581, 12.6713, 12.7690, 12.8661, 12.9628,\n 12.8769, 12.9732, 12.8881, 12.9840, 13.0795, 12.9952, 13.0903, 13.1849,\n 13.2791, 13.3728, 13.2895, 13.3829, 13.4758, 13.5683, 13.4859, 13.5781,\n 13.4963, 13.5881, 13.6796, 13.7706, 13.8613, 13.7803, 13.6999, 13.6201,\n 13.7106, 13.8007, 13.7215, 13.8113, 13.9007, 13.9897, 14.0784, 14.0000,\n 14.0884, 14.1764, 14.2640, 14.1863, 14.2737, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Joe paid the detective after he received the final report on the case.\nWith pronoun replaced: Joe received the final report on the case.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.3379, -0.1342, -0.2000, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, 0.0592, 0.0000,\n -0.0586, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.0983, -0.1469, -0.1952, -0.0486, 0.0969, 0.2414, 0.1925,\n 0.3356, 0.4778, 0.4286, 0.3797, 0.3311, 0.2828, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, 0.0907, 0.2261, 0.1803, 0.3146, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.3522, 0.4828, 0.4377, 0.3928, 0.3482,\n 0.3038, 0.4327, 0.5610, 0.6885, 0.6437, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.6299, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.7816, 0.9027, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 2.9938, 3.3113, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 1.7823, 2.0494, 1.9245, 1.8034, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.9599, 2.2011, 2.4371, 2.6681, 2.8943, 2.7791, 3.0000,\n 3.2167, 3.1027, 3.3147, 3.2026, 3.4101, 3.2998, 3.1918, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.6833, 2.5873, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.4163, 2.3276, 2.2404, 2.1546, 2.3462, 2.5352, 2.4495,\n 2.6354, 2.8189, 3.0000, 3.1789, 3.0924, 3.0071, 3.1829, 3.3566,\n 3.5283, 3.6979, 3.8657, 3.7796, 3.9452, 3.8600, 4.0234, 4.1851,\n 4.3451, 4.2601, 4.1761, 4.3339, 4.2507, 4.4066, 4.3241, 4.2426,\n 4.1621, 4.0825, 4.0038, 4.1569, 4.3086, 4.4590, 4.6079, 4.7556,\n 4.9019, 4.8226, 4.9675, 4.8889, 5.0323, 5.1745, 5.3156, 5.2372,\n 5.3769, 5.2992, 5.4377, 5.5750, 5.7112, 5.6338, 5.7689, 5.9029,\n 6.0359, 6.1680, 6.2990, 6.2217, 6.3517, 6.2750, 6.4039, 6.5320,\n 6.6591, 6.7854, 6.7090, 6.6332, 6.7585, 6.8828, 7.0063, 7.1291,\n 7.2510, 7.1755, 7.2966, 7.4168, 7.3419, 7.4613, 7.3869, 7.5056,\n 7.6235, 7.7407, 7.6667, 7.7831, 7.7096, 7.8253, 7.9403, 8.0546,\n 7.9816, 8.0952, 8.2082, 8.3205, 8.4322, 8.5433, 8.4706, 8.5810,\n 8.5088, 8.6186, 8.7278, 8.6560, 8.7646, 8.6933, 8.8013, 8.7305,\n 8.8379, 8.7676, 8.8744, 8.8045, 8.9107, 8.8413, 8.9469, 9.0520,\n 9.1566, 9.0876, 9.0190, 9.1230, 9.2265, 9.3295, 9.2613, 9.1936,\n 9.2960, 9.3980, 9.3306, 9.2637, 9.1971, 9.2986, 9.3995, 9.5000,\n 9.6000, 9.6996, 9.7987, 9.8974, 9.9957, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: We had hoped to place copies of our newsletter on all the chairs in the auditorium, but there were simply too many of them.\nWith pronoun replaced: There were simply too many copies of the newsletter.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.4436, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.2309, -0.2872, -0.3430, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.7807, -0.6321, -0.4845, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.4593, -0.5037, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -0.9897, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "76", + "Fraction of T in Greenlist": "38.2%", + "z-score": "4.3", + "p value": "8.64e-06", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.2928, 0.0000, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.9918, 1.2472, 1.1446, 1.0445, 1.2910, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.3744, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 1.1088, 1.0265, 1.2366, 1.4434,\n 1.6471, 1.5635, 1.7634, 1.9604, 2.1546, 2.0702, 1.9870, 2.1773,\n 2.3651, 2.2819, 2.2000, 2.1193, 2.0397, 2.2226, 2.1436, 2.3238,\n 2.2453, 2.1678, 2.0913, 2.0158, 1.9413, 1.8677, 1.7951, 1.9695,\n 1.8972, 2.0692, 2.2393, 2.4077, 2.3349, 2.5011, 2.6656, 2.8284,\n 2.7552, 2.6828, 2.8433, 3.0022, 2.9299, 2.8583, 2.7875, 2.7175,\n 2.8735, 2.8039, 2.9582, 2.8889, 2.8203, 2.7524, 2.6852, 2.6186,\n 2.5527, 2.4874, 2.6381, 2.5731, 2.7222, 2.8701, 3.0168, 2.9515,\n 3.0967, 3.2408, 3.3838, 3.3182, 3.2533, 3.3947, 3.5350, 3.4701,\n 3.4058, 3.3420, 3.2788, 3.4171, 3.3542, 3.4913, 3.4286, 3.3665,\n 3.3049, 3.2437, 3.1831, 3.1229, 3.0632, 3.1977, 3.1382, 3.2717,\n 3.4042, 3.5359, 3.4762, 3.6068, 3.7366, 3.8655, 3.8057, 3.7463,\n 3.8741, 4.0011, 3.9418, 3.8829, 3.8244, 3.7664, 3.8919, 3.8341,\n 3.9586, 3.9010, 3.8438, 3.7870, 3.7306, 3.6745, 3.6188, 3.5635,\n 3.6862, 3.6310, 3.7528, 3.8740, 3.9945, 3.9392, 4.0589, 4.1779,\n 4.2962, 4.2409, 4.1859, 4.3033, 4.4202, 4.3652, 4.3106, 4.2563,\n 4.2023, 4.3180, 4.2642, 4.3792, 4.3256, 4.2723, 4.2193, 4.1667,\n 4.1143, 4.0622, 4.0105, 4.1239, 4.0723, 4.1851, 4.2974])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.999%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Joe has sold his house and bought a new one a few miles away. He will be moving into it on Thursday.\nWith pronoun replaced: He will be moving into The new house on Thursday.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.0722, -0.8165, -0.8893, -0.9608, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.5922, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.5315, -0.5822, -0.6325,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.6783, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.7703, -0.6402, -0.6810,\n -0.7216, -0.7620, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.6667,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.6993, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "67", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "46.3%", + "z-score": "4.02", + "p value": "2.9e-05", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 1.3472,\n 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 2.6605, 2.9938,\n 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856, 2.1939, 2.4910, 2.7778,\n 3.0551, 2.8947, 2.7406, 2.5924, 2.8577, 3.1156, 2.9704, 2.8301, 2.6943,\n 2.9424, 2.8098, 2.6811, 2.9212, 2.7952, 2.6726, 2.5533, 2.7852, 3.0123,\n 3.2348, 3.4528, 3.6667, 3.5466, 3.7559, 3.9614, 3.8431, 3.7273, 3.9284,\n 3.8146, 3.7033, 3.5942, 3.4873, 3.3824, 3.2796, 3.4743, 3.6662, 3.8552,\n 4.0415, 3.9386, 3.8376, 4.0205])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "99.997%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Always before, Larry had helped Dad with his work. But he could not help him now, for Dad said that his boss at the railroad company would not want anyone but him to work in the office.\nWith pronoun replaced: He could not help Dad now.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.0565, -0.9218, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -0.8914, -0.9313, -0.8022, -0.6737, -0.7139, -0.7539, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.6202, -0.4949, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "97", + "Fraction of T in Greenlist": "49.0%", + "z-score": "7.8", + "p value": "3.2e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.5590, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.3333,\n 3.2167, 3.1027, 3.3147, 3.5228, 3.7273, 3.9284, 3.8146, 4.0119,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.0657, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.0205, 3.9208, 3.8228, 4.0024, 4.1797, 4.3546,\n 4.5274, 4.4296, 4.6000, 4.7683, 4.9346, 4.8375, 4.7419, 4.6476,\n 4.5547, 4.4630, 4.6262, 4.7875, 4.6967, 4.8561, 4.7662, 4.6775,\n 4.5899, 4.7469, 4.9023, 5.0562, 4.9691, 5.1212, 5.2719, 5.4212,\n 5.3345, 5.2489, 5.1643, 5.0807, 4.9980, 5.1450, 5.2906, 5.4349,\n 5.3526, 5.2713, 5.1908, 5.3333, 5.4747, 5.6149, 5.5348, 5.6737,\n 5.8114, 5.9481, 5.8684, 5.7894, 5.7112, 5.6338, 5.5572, 5.6921,\n 5.8260, 5.9589, 6.0908, 6.0143, 6.1451, 6.0693, 6.1990, 6.1237,\n 6.2524, 6.3803, 6.3054, 6.4322, 6.3580, 6.2843, 6.4101, 6.3369,\n 6.2644, 6.3892, 6.3172, 6.2458, 6.1750, 6.1047, 6.0351, 5.9660,\n 5.8974, 5.8294, 5.9524, 5.8848, 6.0069, 6.1283, 6.0609, 6.1815,\n 6.3013, 6.4203, 6.3532, 6.4715, 6.5891, 6.7060, 6.6391, 6.7552,\n 6.8707, 6.9856, 6.9189, 6.8527, 6.7869, 6.7217, 6.6568, 6.7706,\n 6.8838, 6.9964, 6.9317, 6.8675, 6.8037, 6.9155, 7.0266, 7.1372,\n 7.0736, 7.1835, 7.2929, 7.4017, 7.3383, 7.4465, 7.5542, 7.6613,\n 7.5981, 7.5353, 7.4729, 7.4109, 7.3493, 7.4556, 7.5614, 7.6667,\n 7.7715, 7.7099, 7.8142, 7.7530, 7.8567, 7.7958])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The table was piled high with food, and on the floor beside it there were crocks, baskets, and a five-quart pail of milk.\nWith pronoun replaced: Beside the table there were crocks, baskets, and a five-quart pail of milk.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.7744, -2.8141, -2.6533, -2.6933, -2.5342, -2.5744,\n -2.6143, -2.6540, -2.4975, -2.5373, -2.5769, -2.6163, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.6960, -2.7341, -2.7721, -2.6224,\n -2.4738, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.4641, -3.4964, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.2603, -0.8165, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.9366, 1.2019, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.9795, 2.2133, 2.4422, 2.6667,\n 2.8868, 2.7761, 2.9913, 2.8823, 2.7757, 2.9856, 3.1918, 3.0861,\n 2.9824, 3.1840, 3.3824, 3.5777, 3.7700, 3.6662, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.3026, 4.2008, 4.1008, 4.2784, 4.4537, 4.3546,\n 4.2571, 4.4296, 4.3333, 4.5034, 4.4083, 4.5760, 4.4820, 4.6476,\n 4.5547, 4.4630, 4.6262, 4.5356, 4.4462, 4.3580, 4.2710, 4.1851,\n 4.1003, 4.0166, 4.1761, 4.3339, 4.2507, 4.4066, 4.5611, 4.4783,\n 4.6311, 4.5491, 4.4680, 4.6188, 4.7682, 4.6876, 4.6079, 4.7556,\n 4.9019, 5.0469, 4.9675, 4.8889, 5.0323, 4.9543, 4.8772, 5.0190,\n 5.1597, 5.0829, 5.2223, 5.3606, 5.4977, 5.6338, 5.5572, 5.6921,\n 5.6160, 5.5407, 5.6743, 5.8069, 5.9386, 5.8635, 5.9941, 5.9196,\n 5.8458, 5.9752, 6.1036, 6.2312, 6.3580, 6.4838, 6.6088, 6.7330,\n 6.6591, 6.7823, 6.7089, 6.6361, 6.7584, 6.8799, 6.8075, 6.7358,\n 6.8563, 6.9762, 7.0952, 7.2136, 7.3312, 7.4482, 7.5644, 7.4927,\n 7.6082, 7.5369, 7.6517, 7.7658, 7.8793, 7.9921, 7.9211, 8.0333,\n 8.1448, 8.0742, 8.0042, 8.1150, 8.2252, 8.3349, 8.4439, 8.3742,\n 8.3050, 8.4133, 8.5212, 8.6284, 8.7351, 8.8413, 8.9469, 9.0520,\n 8.9830, 9.0876, 9.0190, 8.9509, 9.0549, 9.1584, 9.0906, 9.0233,\n 9.1262, 9.2287, 9.3306, 9.4321, 9.5331, 9.6336, 9.7337, 9.6667,\n 9.7663, 9.6996, 9.7987, 9.8974, 9.8311, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Jim comforted Kevin because he was so upset.\nWith pronoun replaced: Kevin was so upset.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "0", + "Fraction of T in Greenlist": "0.0%", + "z-score": "-1.73", + "p value": "0.958", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.4237, 1.6859, 1.9415, 1.8257,\n 1.7132, 1.9599, 2.2011, 2.0889, 1.9795, 1.8728, 1.7685, 2.0000,\n 2.2269, 2.4495, 2.6679, 2.8823, 3.0929, 2.9856, 3.1918, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 4.2528, 4.4371, 4.6188,\n 4.5115, 4.4061, 4.5847, 4.4809, 4.6568, 4.8305, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 5.8377, 5.9932, 5.8936, 6.0474, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.7931, 6.6944, 6.8391, 6.9824, 6.8849, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.0000, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.6357, 8.7600, 8.6679, 8.7913, 8.9138, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.6484, 9.5620, 9.6758, 9.5902, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.5238, 10.4407, 10.5475, 10.4652, 10.5714, 10.6771,\n 10.7822, 10.8867, 10.9906, 11.0940, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.4209, 11.3402, 11.2602, 11.3610, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.8956, 11.9927, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.3935, 12.4880, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.7082, 12.6323, 12.7248, 12.6494, 12.7416, 12.8333,\n 12.9247, 13.0157, 13.1063, 13.1966, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Jackson was greatly influenced by Arnold, though he lived two centuries earlier.\nWith pronoun replaced: Arnold lived two centuries earlier.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.2462,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, 0.0000, -0.0574, -0.1143, 0.0569, 0.2265,\n 0.3944, 0.3365, 0.2791, 0.2222, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.2010, 0.1502, 0.2993, 0.4472, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.5855, 0.5348, 0.4845, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.4944, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.7878, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.7336, 0.6885, 0.8154, 0.7703, 0.7255, 0.6810,\n 0.6367, 0.7620, 0.7177, 0.6737, 0.6299, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.5375, 0.6598, 0.7816, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.5796, 3.8497, 4.1111, 3.9279, 3.7524, 4.0056, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.5260, 4.3894, 4.2563, 4.1265, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.9962, 4.8712, 4.7488, 4.9377,\n 5.1236, 5.0034, 4.8857, 5.0684, 5.2485, 5.1326, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.8789, 5.7719, 5.6667, 5.8279, 5.7242, 5.6220, 5.5213, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.5435, 5.6986, 5.8522, 6.0041, 5.9084,\n 5.8139, 5.9641, 6.1128, 6.0193, 5.9270, 6.0740, 6.2197, 6.1283,\n 6.0380, 6.1820, 6.3248, 6.2354, 6.3768, 6.5169, 6.4283, 6.5672,\n 6.7049, 6.6171, 6.7536, 6.8889, 6.8019, 6.7159, 6.8500, 6.9830,\n 7.1149, 7.0296, 6.9451, 7.0759, 6.9923, 6.9094, 6.8274, 6.9570,\n 7.0857, 7.2134, 7.1319, 7.2587, 7.3845, 7.3037, 7.2236, 7.3485,\n 7.4724, 7.5955, 7.5161, 7.4373, 7.5595, 7.4813, 7.4039, 7.3271,\n 7.4483, 7.5687, 7.6883, 7.6120, 7.7308, 7.8489, 7.7732, 7.6980,\n 7.8153, 7.9318, 8.0476, 7.9729, 7.8988, 8.0139, 7.9403, 7.8673,\n 7.7949, 7.7230, 7.8372, 7.7658, 7.6950, 7.6246, 7.7380, 7.6681,\n 7.5988, 7.7114, 7.8233, 7.7544, 7.6859, 7.6179, 7.7291, 7.8397,\n 7.9497, 8.0591, 7.9913, 7.9241, 7.8572, 7.9659, 8.0741, 8.1817,\n 8.2887, 8.2221, 8.1560, 8.0902, 8.1966, 8.3024, 8.4078, 8.5126,\n 8.4471, 8.3820, 8.3173, 8.2531, 8.1892, 8.2933, 8.2298, 8.3333,\n 8.2702, 8.2074, 8.1449, 8.2479, 8.3503, 8.4523, 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: There is a pillar between me and the stage, and I can't see around it.\nWith pronoun replaced: I can't see around the stage.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -0.9759, -1.0211, -1.0659, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -1.0690,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.0215,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -1.0390, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "50.0%", + "z-score": "2.31", + "p value": "0.0105", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.9379, 2.3094])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Archaeologists have concluded that humans lived in Laputa 20,000 years ago. They hunted for deer on the river banks.\nWith pronoun replaced: Prehistoric humans hunted for deer on the river banks.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.4%", + "z-score": "0.452", + "p value": "0.325", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, -0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.1245, -0.1857, 0.0000,\n -0.0612, -0.1217, 0.0605, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.1063, -0.1588, 0.0000,\n -0.0525, -0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, -0.1448, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.1410, -0.1873,\n -0.2334, -0.2791, -0.1391, -0.1849, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.6199, -0.6623, -0.5283, -0.3951, -0.4377, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.2146, -0.2568, -0.2988, -0.1703,\n -0.0424, -0.0847, -0.1267, 0.0000, -0.0420, 0.0838, 0.0418, 0.1667,\n 0.2909, 0.4145, 0.5375, 0.4949, 0.4525])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.1547,\n -1.2603, -0.8165, -0.9272, -0.5164, -0.1260, 0.2462, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 3.9158, 3.7897, 4.0000,\n 3.8765, 4.0825, 3.9614, 3.8431, 3.7273, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.6571, 4.5461, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.1490, 5.0410, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.9333, 6.0928, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 6.8931,\n 6.7931, 6.9378, 6.8391, 6.9824, 6.8849, 6.7886, 6.6935, 6.8354,\n 6.9759, 6.8819, 7.0211, 7.1591, 7.2960, 7.2029, 7.3386, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.6867, 7.5967, 7.7268, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.7439, 8.8631, 8.9815,\n 9.0991, 9.0134, 8.9285, 9.0453, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.8975, 9.8150,\n 9.9249, 10.0342, 9.9524, 9.8712, 9.9800, 10.0881, 10.0076, 9.9278,\n 9.8486, 9.9562, 9.8776, 9.7997, 9.9067, 10.0131, 9.9357, 10.0416,\n 9.9648, 9.8887, 9.8131, 9.7380, 9.8433, 9.7688, 9.8736, 9.9778,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.3154, 10.2419, 10.3435, 10.4447,\n 10.5453, 10.6455, 10.7451, 10.8444, 10.7714, 10.6990, 10.7978, 10.8961,\n 10.8241, 10.9220, 11.0194, 10.9480, 11.0450, 11.1415, 11.0705, 11.1667,\n 11.2624, 11.1919, 11.2872, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam and Amy are passionately in love, but Amy's parents are unhappy about it, because they are snobs.\nWith pronoun replaced: Amy's parents are snobs.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "30", + "Fraction of T in Greenlist": "15.1%", + "z-score": "-3.23", + "p value": "0.999", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.4351,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -2.8887, -2.9241, -2.9593,\n -2.9943, -3.0292, -3.0639, -3.0984, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.0987, -3.1327, -3.1665, -3.2002, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.2004, -3.2333])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "85", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "60.0%", + "z-score": "7.45", + "p value": "4.59e-14", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 1.1793, 1.0541, 1.3480, 1.6330,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.5627, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.6098, 3.4816, 3.7009, 3.9158, 4.1265, 4.0000,\n 4.2064, 4.4091, 4.6082, 4.4836, 4.6790, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 6.8205, 6.9714,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bill passed the half-empty plate to John because he was hungry.\nWith pronoun replaced: John was hungry.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.3817, -1.4382, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.4967, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.4313, -1.4796, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.2173, -1.2649,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.0105, -1.0531, -1.0954,\n -1.1375, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.0974, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.0106, -1.0499, -1.0890, -1.1279, -1.0000,\n -1.0390, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 1.3093, 1.1793, 1.4757, 1.7628, 2.0412,\n 1.9096, 1.7823, 2.0494, 2.3094, 2.1831, 2.0605, 2.3113, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.7852, 2.6681, 2.5538, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.6679, 2.8823, 3.0929, 2.9856, 3.1918, 3.3947,\n 3.5942, 3.7905, 3.9837, 3.8759, 4.0657, 3.9595, 4.1461, 4.3301,\n 4.5115, 4.4061, 4.3026, 4.4809, 4.6568, 4.5544, 4.4537, 4.6268,\n 4.7977, 4.6981, 4.6000, 4.5034, 4.6715, 4.8375, 4.7419, 4.9058,\n 5.0679, 5.2281, 5.3867, 5.5435, 5.4482, 5.6032, 5.5090, 5.6622,\n 5.8139, 5.9641, 5.8707, 5.7785, 5.6875, 5.8358, 5.9827, 5.8926,\n 6.0380, 6.1820, 6.3248, 6.4663, 6.6066, 6.5169, 6.6559, 6.5672,\n 6.7049, 6.8414, 6.9768, 6.8889, 7.0231, 7.1563, 7.2884, 7.2012,\n 7.3322, 7.4622, 7.5912, 7.5048, 7.4193, 7.5472, 7.6742, 7.5895,\n 7.5056, 7.6315, 7.7566, 7.6734, 7.5910, 7.7152, 7.8384, 7.7567,\n 7.8791, 8.0006, 8.1214, 8.0403, 7.9600, 8.0798, 8.1989, 8.1192,\n 8.0402, 8.1585, 8.2760, 8.1976, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.5424, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.9151, 9.0257, 8.9502, 9.0601, 8.9851, 9.0944, 9.2032, 9.3113,\n 9.2368, 9.1629, 9.2704, 9.3774, 9.3040, 9.2311, 9.3374, 9.4432,\n 9.3708, 9.2990, 9.4042, 9.5089, 9.4375, 9.5416, 9.6452, 9.7483,\n 9.6774, 9.7800, 9.8821, 9.9837, 9.9132, 10.0143, 10.1149, 10.2151,\n 10.1450, 10.2447, 10.3439, 10.4427, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.5974, 10.6944, 10.7910, 10.8872, 10.8184, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sara borrowed the book from the library because she needs it for an article she is working on. She writes it when she gets home from work.\nWith pronoun replaced: She writes the book when she gets home from work.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "87", + "Fraction of T in Greenlist": "43.7%", + "z-score": "6.1", + "p value": "5.36e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.9245, 1.8034, 2.0605, 2.3113, 2.1909,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.3333,\n 2.2269, 2.4495, 2.3445, 2.5621, 2.7757, 2.6713, 2.8804, 3.0861,\n 2.9824, 2.8808, 2.7811, 2.9814, 2.8830, 3.0796, 2.9823, 3.1754,\n 3.0793, 3.2691, 3.4562, 3.3607, 3.5447, 3.7264, 3.6315, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.7087, 3.6187, 3.7916, 3.7025, 3.8730,\n 4.0415, 3.9530, 4.1192, 4.2836, 4.1957, 4.1090, 4.0234, 4.1851,\n 4.1003, 4.2601, 4.1761, 4.3339, 4.2507, 4.4066, 4.5611, 4.4783,\n 4.6311, 4.7823, 4.7001, 4.6188, 4.5384, 4.6876, 4.6079, 4.5291,\n 4.4511, 4.5983, 4.5210, 4.6667, 4.8111, 4.7341, 4.8772, 5.0190,\n 4.9424, 4.8666, 4.7916, 4.9317, 4.8572, 4.7834, 4.7104, 4.8488,\n 4.7763, 4.9135, 5.0496, 4.9774, 5.1123, 5.2463, 5.1744, 5.1031,\n 5.0325, 5.1650, 5.0948, 5.0252, 4.9562, 5.0873, 5.0187, 5.1488,\n 5.2779, 5.2096, 5.3378, 5.4650, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.3220, 5.2560, 5.1905, 5.3153, 5.2501, 5.3740, 5.4971, 5.4322,\n 5.5544, 5.6760, 5.6112, 5.5470, 5.4832, 5.6036, 5.5402, 5.4772,\n 5.4147, 5.5340, 5.4718, 5.5904, 5.7082, 5.6462, 5.7633, 5.8797,\n 5.8179, 5.7565, 5.6955, 5.8110, 5.7503, 5.6899, 5.6300, 5.7446,\n 5.6849, 5.7987, 5.9120, 5.8525, 5.9651, 6.0770, 6.0177, 5.9588,\n 5.9002, 6.0113, 5.9530, 5.8951, 5.8375, 5.9477, 5.8904, 6.0000,\n 6.1091, 6.0519, 6.1604, 6.2684, 6.2113, 6.1546, 6.0982])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.4958, 5.3072,\n 5.1257, 4.9507, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 4.8107, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.1962, 5.0623, 4.9316, 4.8038, 4.9962, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.8812, 5.7689, 5.6585, 5.5500, 5.7155,\n 5.6086, 5.5035, 5.6667, 5.5630, 5.7242, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.9932, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.3638, 7.2650, 7.4044, 7.5425,\n 7.4449, 7.5818, 7.4853, 7.3901, 7.2960, 7.4316, 7.3386, 7.2466,\n 7.3810, 7.2900, 7.4233, 7.3333, 7.2443, 7.3765, 7.5076, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.6436,\n 8.7652, 8.8860, 8.7978, 8.7104, 8.8304, 8.9496, 8.8631, 8.9815,\n 8.8958, 8.8108, 8.7267, 8.8443, 8.7610, 8.6783, 8.7952, 8.7133,\n 8.8294, 8.7482, 8.6677, 8.7831, 8.8978, 9.0117, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.6814, 9.7908, 9.8995, 10.0076, 10.1151,\n 10.0353, 9.9562, 10.0631, 10.1695, 10.0910, 10.1968, 10.1189, 10.0416,\n 9.9648, 10.0701, 9.9940, 9.9184, 10.0231, 9.9481, 10.0523, 9.9778,\n 9.9038, 10.0074, 10.1106, 10.2132, 10.3154, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.8186, 10.9178, 11.0165, 11.1148, 11.2126, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.1883, 11.2848, 11.2129, 11.1415, 11.0705, 11.1667,\n 11.0961, 11.0261, 11.1218, 11.0521, 11.1475, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: John promised Bill to leave, so an hour later he left.\nWith pronoun replaced: Bill left.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142, 1.2702, 1.5852, 1.4444,\n 1.3093, 1.6082, 1.8974, 1.7628, 2.0412, 1.9096, 1.7823, 2.0494, 2.3094,\n 2.1831, 2.0605, 1.9415, 1.8257, 1.7132, 1.6036, 1.8489, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.3333, 1.5671, 1.7963, 2.0211, 2.2418, 2.1412, 2.0428,\n 1.9462, 2.1602, 2.0647, 1.9711, 2.1798, 2.3851, 2.2916, 2.1997, 2.1094,\n 2.0207, 1.9335, 1.8477, 2.0455, 2.2404, 2.4327, 2.3462, 2.5352, 2.4495,\n 2.3651, 2.5504, 2.4667, 2.3842, 2.5660, 2.4841, 2.4034, 2.3238, 2.5019,\n 2.4228, 2.3448, 2.5198, 2.6928, 2.6148, 2.7854, 2.7080, 2.6316, 2.7995,\n 2.9656, 2.8893, 2.8138, 2.7393, 2.6656, 2.5927, 2.5207, 2.6828, 2.6112,\n 2.5403, 2.4703, 2.4010, 2.3324, 2.4910, 2.6481, 2.8039, 2.9582, 2.8889,\n 2.8203, 2.7524, 2.9044, 2.8368, 2.7699, 2.9200, 3.0688, 3.0019, 2.9357,\n 2.8701, 2.8051, 2.7406, 2.6768, 2.8226, 2.9673, 3.1109, 3.0467, 3.1889,\n 3.1251, 3.0619, 3.2025, 3.1395, 3.0770, 3.2161, 3.1539, 3.0923, 3.0311,\n 3.1685, 3.1076, 3.0471, 3.1831, 3.3181, 3.2577, 3.3915, 3.3314, 3.2717,\n 3.4042, 3.5359, 3.4762, 3.4170, 3.3582, 3.2998, 3.2419, 3.1844, 3.3140,\n 3.2567, 3.1998, 3.1433, 3.0872, 3.0315, 3.1593, 3.2863, 3.4126, 3.5382,\n 3.4821, 3.4263, 3.3710, 3.4953, 3.4401, 3.3853, 3.5085, 3.6310, 3.5762,\n 3.5218, 3.4677, 3.4140, 3.3606, 3.3075, 3.4283, 3.5485, 3.6680, 3.6148,\n 3.7335, 3.6805, 3.6277, 3.7455, 3.6929, 3.6407, 3.7576, 3.7055, 3.6537,\n 3.6021, 3.7180, 3.6667, 3.6156, 3.7306, 3.8451, 3.7940, 3.9078, 3.8569,\n 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321, 1.5403, 1.3608,\n 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284, 2.6558, 2.4910, 2.3333,\n 2.6186, 2.4659, 2.3190, 2.5924, 2.8577, 2.7136, 2.9704, 2.8301, 2.6943,\n 2.9424, 3.1844, 3.0509, 2.9212, 2.7952, 2.6726, 2.5533, 2.4371, 2.6681,\n 2.8943, 3.1160, 3.0000, 3.2167, 3.1027, 2.9913, 3.2026, 3.4101, 3.6141,\n 3.8146, 3.7033, 3.9001, 3.7905, 3.6831, 3.5777, 3.7700, 3.6662, 3.5642,\n 3.7528, 3.9386, 3.8376, 4.0205, 3.9208, 3.8228, 4.0024, 4.1797, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.6187, 3.7916, 3.9624, 4.1312, 4.0415,\n 4.2080, 4.1192, 4.0316, 4.1957, 4.3580, 4.5186, 4.6775, 4.5899, 4.7469,\n 4.6603, 4.5747, 4.4901, 4.6448, 4.5611, 4.4783, 4.6311, 4.7823, 4.7001,\n 4.8497, 4.7682, 4.6876, 4.8355, 4.9820, 4.9019, 4.8226, 4.7442, 4.6667,\n 4.5899, 4.5140, 4.6580, 4.8008, 4.9424, 4.8666, 5.0070, 4.9317, 4.8572,\n 4.9960, 5.1338, 5.2705, 5.4061, 5.3316, 5.4661, 5.3921, 5.3189, 5.2463,\n 5.3793, 5.3072, 5.2358, 5.3675, 5.4983, 5.4272, 5.5570, 5.4863, 5.4163,\n 5.5448, 5.6725, 5.6028, 5.5336, 5.4650, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.5138, 5.6383, 5.5714, 5.6949, 5.6285, 5.5626, 5.6851, 5.8068, 5.9279,\n 6.0481, 5.9822, 6.1017, 6.0362, 5.9711, 5.9065, 6.0249, 5.9607, 5.8969,\n 6.0145, 6.1314, 6.0678, 6.1839, 6.1207, 6.0579, 6.1732, 6.2879, 6.2253,\n 6.1632, 6.1014, 6.0401, 5.9792, 5.9186, 6.0321, 6.1449, 6.2572, 6.1968,\n 6.3084, 6.2482, 6.1884, 6.2993, 6.4096, 6.5193, 6.6285, 6.5688, 6.6774,\n 6.6179, 6.5588, 6.5000, 6.6078, 6.5493, 6.4912, 6.5983, 6.7049, 6.6469,\n 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sara borrowed the book from the library because she needs it for an article she is working on. She reads it when she gets home from work.\nWith pronoun replaced: She reads the article when she gets home from work.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.8165,\n 1.1055, 0.9901, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.7332, 0.6547, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.9623, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 1.0523, 0.9847,\n 1.1628, 1.3389, 1.2710, 1.4446, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.0915, 1.2577, 1.1946, 1.3587,\n 1.2959, 1.4580, 1.6186, 1.5556, 1.7143, 1.6514, 1.5892, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.5010, 1.4412, 1.3819, 1.5348, 1.4757,\n 1.6271, 1.5681, 1.7179, 1.6591, 1.6008, 1.7488, 1.6906, 1.6330,\n 1.5758, 1.5191, 1.4629, 1.4071, 1.5519, 1.4963, 1.4410, 1.5842,\n 1.5291, 1.6710, 1.6160, 1.7566, 1.7018, 1.6473, 1.5933, 1.5396,\n 1.6781, 1.6246, 1.5714, 1.7085, 1.8446, 1.9799, 1.9263, 1.8732,\n 1.8204, 1.7679, 1.9013, 1.8490, 1.7970, 1.9291, 1.8773, 2.0083,\n 1.9566, 2.0866, 2.0350, 2.1640, 2.2923, 2.2406, 2.1892, 2.1381,\n 2.0873, 2.0369, 1.9868, 2.1131, 2.0631, 2.0134, 2.1385, 2.0889,\n 2.2132, 2.1637, 2.2871, 2.2377, 2.3603, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.1896, 2.3098, 2.2618, 2.2141, 2.3333,\n 2.2857, 2.4042, 2.3567, 2.4744, 2.5915, 2.7080, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.8301, 3.0792, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 2.6726, 2.5533, 2.7852, 3.0123, 3.2348, 3.4528, 3.6667,\n 3.5466, 3.4293, 3.3147, 3.5228, 3.7273, 3.6141, 3.5032, 3.3947,\n 3.2883, 3.1840, 3.0817, 2.9814, 3.1787, 3.0796, 2.9823, 3.1754,\n 3.3657, 3.2691, 3.4562, 3.3607, 3.5447, 3.4503, 3.3574, 3.2660,\n 3.1760, 3.0873, 3.2667, 3.4438, 3.3556, 3.5301, 3.7025, 3.8730,\n 3.7849, 3.9530, 4.1192, 4.2836, 4.1957, 4.1090, 4.2710, 4.1851,\n 4.3451, 4.5035, 4.6603, 4.5747, 4.7296, 4.8830, 5.0350, 4.9497,\n 4.8655, 4.7823, 4.9322, 5.0807, 4.9980, 5.1450, 5.2906, 5.4349,\n 5.5780, 5.7199, 5.8605, 6.0000, 5.9171, 6.0553, 6.1924, 6.3283,\n 6.4632, 6.5970, 6.7298, 6.6471, 6.7788, 6.9094, 6.8274, 6.9570,\n 7.0857, 7.2134, 7.1319, 7.2587, 7.3845, 7.5094, 7.4286, 7.3485,\n 7.4724, 7.3930, 7.5161, 7.6383, 7.7597, 7.8803, 7.8014, 7.9212,\n 8.0402, 8.1585, 8.2760, 8.3927, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 9.0340, 9.1452, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.8293, 9.9357, 9.8590,\n 9.9648, 10.0701, 10.1749, 10.2790, 10.3827, 10.3065, 10.4097, 10.5123,\n 10.4367, 10.5388, 10.6404, 10.7415, 10.6665, 10.7671, 10.8673, 10.9669,\n 10.8925, 10.8186, 10.9178, 10.8444, 10.9431, 11.0414, 11.1392, 11.0663,\n 11.1637, 11.2607, 11.3572, 11.4533, 11.3809, 11.4766, 11.5718, 11.5000,\n 11.5948, 11.6893, 11.7833, 11.7120, 11.8056, 11.8988, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mark heard Steve's feet going down the ladder. The door of the shop closed after him. He ran to look out the window.\nWith pronoun replaced: The door of the shop closed after Steve.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.3%", + "z-score": "-2.18", + "p value": "0.985", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.3696, -0.4399, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -0.8520, -0.6658, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.6045, -1.6473, -1.6898, -1.5396,\n -1.3904, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.7237, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -2.0656, -2.1028, -2.1398, -2.1766, -2.2133,\n -2.2497, -2.2860, -2.1532, -2.1896, -2.2258, -2.2618, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.1086, -2.1444, -2.1801])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774, 0.4201, 0.8165,\n 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142, 1.7321, 2.0381, 1.8889,\n 1.7457, 1.6082, 1.8974, 1.7628, 2.0412, 2.3116, 2.1783, 2.4398, 2.6943,\n 2.9424, 2.8098, 3.0509, 3.2863, 3.5165, 3.7417, 3.9620, 4.1779, 4.0451,\n 4.2563, 4.4634, 4.6667, 4.5363, 4.4091, 4.2848, 4.4836, 4.3618, 4.2426,\n 4.1260, 4.0119, 4.2060, 4.0937, 3.9837, 3.8759, 3.7700, 3.6662, 3.8552,\n 4.0415, 4.2251, 4.1219, 4.3026, 4.4809, 4.6568, 4.5544, 4.4537, 4.3546,\n 4.5274, 4.4296, 4.6000, 4.7683, 4.9346, 4.8375, 4.7419, 4.6476, 4.8113,\n 4.9731, 5.1332, 5.0395, 4.9472, 4.8561, 5.0138, 4.9237, 5.0795, 5.2338,\n 5.3865, 5.2970, 5.2086, 5.3594, 5.5088, 5.6569, 5.5690, 5.7155, 5.8606,\n 6.0044, 5.9172, 5.8310, 5.7457, 5.8878, 5.8034, 5.9442, 6.0838, 6.2222,\n 6.1383, 6.2755, 6.4116, 6.5465, 6.4632, 6.3807, 6.2991, 6.4327, 6.3517,\n 6.4842, 6.6157, 6.7462, 6.6658, 6.5861, 6.7155, 6.8439, 6.9714, 6.8922,\n 7.0187, 7.1443, 7.2691, 7.1904, 7.1125, 7.0353, 7.1590, 7.0823, 7.2051,\n 7.3271, 7.4483, 7.3721, 7.4924, 7.6120, 7.7308, 7.6551, 7.5800, 7.5056,\n 7.6235, 7.5495, 7.6667, 7.7831, 7.7096, 7.8253, 7.7524, 7.6800, 7.7949,\n 7.7230, 7.8372, 7.7658, 7.6950, 7.6246, 7.7380, 7.8507, 7.7808, 7.7114,\n 7.8233, 7.9347, 7.8657, 7.7971, 7.9078, 8.0178, 8.1273, 8.0591, 7.9913,\n 8.1001, 8.0328, 8.1410, 8.2486, 8.3557, 8.4623, 8.5683, 8.6738, 8.7788,\n 8.8832, 8.9872, 8.9199, 8.8531, 8.9565, 9.0593, 9.1617, 9.2637, 9.1971,\n 9.2986, 9.2324, 9.3333, 9.2676, 9.2022, 9.1372, 9.2376, 9.3375, 9.2729,\n 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, he was twelve years old.\nWith pronoun replaced: When Fred first saw my father, My father was twelve years old.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.1777, 0.0000,\n -0.0586, -0.1166, 0.0580, 0.0000, 0.1723, 0.1143, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.4915, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.7971, -0.6623, -0.7044, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.9415, -0.8109, -0.6810,\n -0.7216, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.6623, 1.0328, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.0381, 1.8889, 2.1822, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.3618, 4.2426, 4.4374, 4.6291,\n 4.8177, 4.7002, 4.8857, 5.0684, 5.2485, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.7133, 5.8812, 6.0469, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.6679, 6.8205, 6.9714,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.2816, 8.4138, 8.5448, 8.6747, 8.5749, 8.7039, 8.8318,\n 8.9586, 8.8602, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.7678, 9.6732, 9.7912, 9.9085,\n 10.0249, 9.9315, 10.0472, 10.1621, 10.2763, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.7635, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.2316, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.3721, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 11.8172, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.6643, 12.7597, 12.8546, 12.9491, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.0688, 13.1617, 13.2542, 13.3463, 13.2668, 13.3585, 13.4499,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.7327, 13.6546, 13.7442, 13.8333,\n 13.9221, 13.8447, 13.9332, 14.0214, 14.1091, 14.0324, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Tatyana knew that Grandma always enjoyed serving an abundance of food to her guests. Now Tatyana watched as Grandma gathered Tatyana's small mother into a wide, scrawny embrace and then propelled her to the table, lifting her shawl from her shoulders, seating her in the place of honor, and saying simply: \"There's plenty.\"\nWith pronoun replaced: Grandma gathered Tatyana's small mother into a wide, scrawny embrace and then propelled Grandma to the table.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.9%", + "z-score": "0.946", + "p value": "0.172", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, 0.2379, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.0586, -0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, 0.0000,\n 0.1629, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.2522, 0.4020, 0.3504, 0.2993, 0.4472, 0.3961,\n 0.3453, 0.4915, 0.4407, 0.3904, 0.5348, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.6689, 0.8095, 0.7593, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.8165, 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.8245, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.8963, 0.8513,\n 0.8065, 0.9313, 0.8866, 0.8422, 0.9659, 0.9215, 0.8773, 1.0000,\n 1.1221, 1.0777, 1.0336, 0.9897, 0.9461])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "129", + "Fraction of T in Greenlist": "64.8%", + "z-score": "13", + "p value": "8.6e-39", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.0037, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.8797, 6.0751, 6.2668, 6.1107, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.6953, 6.5485, 6.4051, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.5615, 7.4370, 7.3147, 7.4686, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.4285, 8.5672, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.8007, 8.9324, 9.0629, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.2376, 9.3641, 9.2609, 9.1590, 9.0582,\n 9.1840, 9.0845, 9.2094, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.9601, 10.0779, 9.9813, 10.0984, 10.0029, 9.9085,\n 9.8150, 9.9315, 9.8389, 9.9547, 10.0698, 10.1840, 10.2975, 10.4103,\n 10.5224, 10.4312, 10.5427, 10.6534, 10.7635, 10.6733, 10.7828, 10.6936,\n 10.6052, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.0952, 11.2001, 11.3043, 11.4080, 11.3232, 11.4263,\n 11.3423, 11.2589, 11.1761, 11.2789, 11.1968, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.7031, 11.8028, 11.7217, 11.8210, 11.9197, 12.0180, 11.9377,\n 12.0355, 11.9558, 11.8766, 11.7980, 11.8956, 11.8176, 11.9147, 12.0114,\n 12.1076, 12.2033, 12.2987, 12.3935, 12.3163, 12.4109, 12.5049, 12.5986,\n 12.5221, 12.6153, 12.5394, 12.4638, 12.3888, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.9574, 12.8836, 12.9739])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Fred watched TV while George went out to buy groceries. After an hour he got back.\nWith pronoun replaced: George got back.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "71", + "Fraction of T in Greenlist": "35.7%", + "z-score": "3.48", + "p value": "0.000252", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.6353, 1.5323, 1.4317, 1.6667,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.6678, 1.5785, 1.4907, 1.7002, 1.9064, 1.8185, 2.0207,\n 1.9335, 1.8477, 2.0455, 1.9604, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.7450, 1.6667, 1.5894, 1.7765, 1.9612, 1.8838, 1.8074,\n 1.9887, 1.9127, 1.8378, 1.7638, 1.9413, 1.8677, 2.0426, 2.2156,\n 2.1420, 2.0692, 2.2393, 2.4077, 2.3349, 2.5011, 2.4286, 2.3570,\n 2.5207, 2.4495, 2.3791, 2.3094, 2.2405, 2.1723, 2.1049, 2.2646,\n 2.1974, 2.1309, 2.2884, 2.4444, 2.3779, 2.3120, 2.4660, 2.4004,\n 2.3354, 2.2711, 2.4227, 2.3586, 2.5087, 2.6575, 2.5934, 2.5298,\n 2.6768, 2.8226, 2.7591, 2.9035, 2.8402, 2.7775, 2.9202, 2.8577,\n 2.7958, 2.7344, 2.6735, 2.6131, 2.5532, 2.6933, 2.6336, 2.5744,\n 2.7129, 2.8505, 2.7913, 2.7325, 2.8687, 2.8101, 2.7520, 2.6943,\n 2.8288, 2.7713, 2.9048, 3.0373, 2.9798, 2.9227, 3.0540, 3.1844,\n 3.1273, 3.2567, 3.1998, 3.1433, 3.2715, 3.2152, 3.1593, 3.1038,\n 3.0486, 2.9938, 2.9394, 3.0657, 3.0114, 2.9575, 3.0827, 3.2071,\n 3.1532, 3.0997, 3.2230, 3.1696, 3.1166, 3.0638, 3.1860, 3.1334,\n 3.2547, 3.3754, 3.3228, 3.2705, 3.3902, 3.5093, 3.4570, 3.5753,\n 3.5232, 3.4713, 3.5887, 3.5370, 3.4857, 3.4346, 3.3838, 3.3333,\n 3.2831, 3.3990, 3.3489, 3.2991, 3.4142, 3.5286, 3.4788])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "95", + "Fraction of T in Greenlist": "48.0%", + "z-score": "7.47", + "p value": "4.09e-14", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, 0.0000,\n 0.3464, 0.6794, 1.0000, 0.8729, 1.1793, 1.4757, 1.7628, 2.0412,\n 1.9096, 1.7823, 2.0494, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.7952, 3.0290, 2.9055, 2.7852, 3.0123, 2.8943, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.9913, 3.2026, 3.4101, 3.2998, 3.1918, 3.3947,\n 3.2883, 3.1840, 3.3824, 3.5777, 3.4743, 3.6662, 3.8552, 4.0415,\n 4.2251, 4.1219, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.1612, 4.0667, 4.2385, 4.4083, 4.5760, 4.7419, 4.9058,\n 4.8113, 4.7181, 4.8797, 4.7875, 4.6967, 4.6070, 4.5186, 4.6775,\n 4.5899, 4.7469, 4.9023, 4.8154, 4.9691, 4.8830, 5.0350, 5.1855,\n 5.1000, 5.2489, 5.1643, 5.0807, 4.9980, 4.9163, 5.0630, 5.2085,\n 5.1273, 5.0469, 4.9675, 4.8889, 4.8111, 4.9543, 4.8772, 5.0190,\n 4.9424, 5.0829, 5.0070, 4.9317, 4.8572, 4.7834, 4.9221, 5.0596,\n 4.9862, 4.9135, 4.8414, 4.9774, 5.1123, 5.2463, 5.3793, 5.3072,\n 5.4391, 5.5701, 5.7001, 5.6282, 5.5570, 5.6858, 5.8138, 5.7429,\n 5.8698, 5.9960, 6.1213, 6.0506, 6.1750, 6.2985, 6.4213, 6.3509,\n 6.4728, 6.4028, 6.3333, 6.4543, 6.3853, 6.3168, 6.4368, 6.5561,\n 6.4880, 6.4203, 6.5387, 6.4715, 6.4048, 6.3385, 6.2728, 6.2075,\n 6.1427, 6.2598, 6.3762, 6.3117, 6.4274, 6.3632, 6.2994, 6.2361,\n 6.1732, 6.1107, 6.0487, 5.9871, 6.1014, 6.2152, 6.1537, 6.2668,\n 6.3793, 6.4911, 6.4298, 6.3689, 6.4800, 6.5906, 6.5299, 6.4695,\n 6.5794, 6.6887, 6.6285, 6.7372, 6.8454, 6.9530, 6.8930, 7.0000,\n 7.1065, 7.2125, 7.1527, 7.2581, 7.3631, 7.4676])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Lily spoke to Donna, breaking her concentration.\nWith pronoun replaced: Lily spoke to Donna, breaking Donna's concentration.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.1547, -0.9766, -1.0290, -1.0809, -1.1323,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.4517, -1.2910, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.5206, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.2623, -1.1237, -1.1651, -1.2063, -1.2472,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.5916, -1.4621, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "64.4%", + "z-score": "11.5", + "p value": "6.43e-31", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 3.1623, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.8742, 5.0779, 5.2778, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.5196, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.8667, 8.0167, 8.1650,\n 8.0413, 7.9196, 7.8000, 7.9472, 7.8296, 7.7139, 7.6000, 7.7460,\n 7.8905, 8.0335, 7.9216, 8.0632, 7.9530, 8.0934, 8.2325, 8.3702,\n 8.2619, 8.3984, 8.5337, 8.4270, 8.3217, 8.4560, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.1343, 9.2609, 9.1590, 9.2847,\n 9.4094, 9.5331, 9.6559, 9.7778, 9.8987, 10.0188, 10.1379, 10.2562,\n 10.3737, 10.4903, 10.3908, 10.2923, 10.1948, 10.3110, 10.2146, 10.1193,\n 10.0249, 10.1405, 10.2554, 10.3695, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.7635, 10.8729, 10.7828, 10.6936,\n 10.6052, 10.7141, 10.8224, 10.9301, 10.8426, 10.9497, 11.0562, 11.1621,\n 11.0756, 11.1810, 11.2857, 11.3899, 11.3043, 11.2194, 11.3232, 11.2390,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.3812, 11.2992, 11.4009, 11.5022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Although they ran at about the same speed, Sue beat Sally because she had such a bad start.\nWith pronoun replaced: Sue had such a bad start.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "55", + "Fraction of T in Greenlist": "27.6%", + "z-score": "0.859", + "p value": "0.195", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.4140, 0.6167, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.7947, 0.7237, 0.9152, 1.1043, 1.0328,\n 0.9623, 0.8926, 1.0773, 1.0079, 0.9393, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 1.1721, 1.1111, 1.2719, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.4059, 1.3460, 1.2865, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.0370, 0.9812, 0.9258, 1.0759, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.7399, 0.6881, 0.6366, 0.5855, 0.5348, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.4174, 0.5547, 0.5069, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.6742, 0.8066, 0.7595, 0.7127,\n 0.8438, 0.7971, 0.9272, 0.8805, 1.0096, 0.9629, 0.9165, 0.8704,\n 0.8245, 0.7789, 0.9062, 1.0328, 0.9870, 0.9415, 1.0670, 1.0215,\n 1.1461, 1.1007, 1.2244, 1.1790, 1.1339, 1.0890, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.8248, 0.9461, 0.9027, 0.8595])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.8490, 3.7017, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.0469, 6.2106, 6.0982, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.9185, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.2939, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.8219, 14.9086, 14.9950, 15.0810, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Madonna fired her trainer because she couldn't stand her boyfriend.\nWith pronoun replaced: She couldn't stand Madonna's boyfriend.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.5941, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.4027, 0.6000, 0.5298, 0.4606, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.3825, 0.3169, 0.2520, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.2408, 0.4191, 0.3573, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.2182,\n 0.3802, 0.3244, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n 0.1525, 0.1013, 0.0504, 0.2010, 0.1502, 0.0998, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.2195, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.2100, -0.2513, -0.2924, -0.1667,\n -0.2078, -0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 2.8947, 2.7406, 2.5924, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.1779, 4.3894, 4.2563, 4.1265, 4.0000,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 5.0034, 4.8857, 4.7703, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.0844, 4.9747, 4.8669, 4.7610, 4.6568, 4.8305, 5.0019, 5.1711,\n 5.3383, 5.2350, 5.4000, 5.5630, 5.7242, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.7382, 5.8936, 5.7955, 5.6986, 5.6032, 5.5090, 5.4160,\n 5.5691, 5.7207, 5.8707, 6.0193, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.2725, 6.1820, 6.3248, 6.4663, 6.3768, 6.5169, 6.4283, 6.3408,\n 6.2541, 6.1685, 6.0838, 6.2222, 6.3595, 6.4957, 6.6308, 6.5465,\n 6.6804, 6.8133, 6.9451, 6.8615, 6.7788, 6.9094, 7.0391, 6.9570,\n 7.0857, 7.0043, 6.9237, 6.8439, 6.7648, 6.6865, 6.8138, 6.9402,\n 7.0658, 7.1904, 7.1125, 7.2363, 7.3592, 7.4813, 7.4039, 7.5251,\n 7.6456, 7.7653, 7.8842, 8.0024, 8.1198, 8.0427, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.2381, 8.3526, 8.4664, 8.5796, 8.6921, 8.6166,\n 8.5417, 8.6535, 8.7647, 8.6903, 8.8008, 8.9107, 9.0200, 9.1287,\n 9.2368, 9.1629, 9.0895, 9.1970, 9.3040, 9.2311, 9.3374, 9.4432,\n 9.5485, 9.6532, 9.7574, 9.6850, 9.6130, 9.7167, 9.8198, 9.7483,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.2565, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.4140, 10.3439, 10.4427, 10.5410, 10.4713, 10.5692, 10.5000,\n 10.4312, 10.3628, 10.2949, 10.2273, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I can't cut that tree down with that axe; it is too small.\nWith pronoun replaced: The axe is too small.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.2182, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, -0.0599, -0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.2265,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.4082,\n 0.3558, 0.3038, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.3961,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.5203, 0.4714, 0.4229, 0.3746,\n 0.3267, 0.2791, 0.2319, 0.3698, 0.3225, 0.2756, 0.4121, 0.5477,\n 0.5005, 0.4536, 0.5879, 0.5410, 0.4944, 0.4481, 0.4021, 0.5345,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.5053, 0.4620, 0.4189, 0.5431, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "162", + "Fraction of T in Greenlist": "81.4%", + "z-score": "18.4", + "p value": "1.02e-75", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.6569,\n 5.8890, 6.1143, 6.3333, 6.5465, 6.3255, 6.5354, 6.7402, 6.5320,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 8.1882, 8.3557, 8.5206, 8.3423, 8.1689, 8.3333,\n 8.4953, 8.6549, 8.8121, 8.6469, 8.8029, 8.9567, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.6838, 9.8254, 9.9653, 10.1036,\n 10.2404, 10.3758, 10.5096, 10.6421, 10.7732, 10.6270, 10.4834, 10.6145,\n 10.7442, 10.8727, 11.0000, 10.8612, 10.9878, 11.1132, 10.9777, 11.1026,\n 11.2263, 11.3489, 11.4704, 11.5909, 11.7104, 11.8289, 11.9464, 12.0630,\n 12.1786, 12.2933, 12.4072, 12.5201, 12.6322, 12.5053, 12.3801, 12.4922,\n 12.6035, 12.7140, 12.8237, 12.7017, 12.8110, 12.9196, 12.7998, 12.9080,\n 13.0154, 13.1221, 13.2280, 13.3333, 13.4379, 13.5419, 13.6451, 13.7477,\n 13.8497, 13.9510, 14.0518, 14.1519, 14.2514, 14.1377, 14.0253, 14.1248,\n 14.2238, 14.3222, 14.4200, 14.3099, 14.4075, 14.5045, 14.3961, 14.4928,\n 14.5890, 14.6847, 14.7799, 14.8746, 14.9687, 15.0624, 15.1556, 15.2483,\n 15.3405, 15.4323, 15.5236, 15.6144, 15.7048, 15.6010, 15.4980, 15.5885,\n 15.6785, 15.7680, 15.8571, 15.7560, 15.8450, 15.9335, 15.8336, 15.9220,\n 16.0099, 16.0974, 16.1845, 16.2712, 16.3575, 16.4435, 16.5291, 16.6143,\n 16.6991, 16.7835, 16.8676, 16.9514, 17.0348, 16.9386, 16.8431, 16.9265,\n 17.0096, 17.0924, 17.1748, 17.0807, 17.1630, 17.2449, 17.1519, 17.2337,\n 17.3151, 17.3962, 17.4770, 17.5575, 17.6377, 17.7176, 17.7971, 17.8764,\n 17.9554, 18.0340, 18.1124, 18.1905, 18.2683, 18.1783, 18.0888, 18.1667,\n 18.2442, 18.3215, 18.3985, 18.3103, 18.3871, 18.4637, 18.3763])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Madonna fired her trainer because she slept with her boyfriend.\nWith pronoun replaced: She slept with Madonna's boyfriend.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142, 1.2702, 1.1323, 1.0000,\n 0.8729, 1.1793, 1.0541, 0.9333, 1.2247, 1.5076, 1.3862, 1.6590, 1.5396,\n 1.4237, 1.3112, 1.2019, 1.0954, 0.9918, 1.2472, 1.1446, 1.0445, 0.9467,\n 0.8513, 0.7579, 0.6667, 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 1.0999,\n 1.3234, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 1.0265, 0.9456,\n 0.8660, 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.8165,\n 0.7433, 0.9396, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746, 0.7057,\n 0.8926, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924, 0.6732, 0.6086,\n 0.5447, 0.7223, 0.8980, 0.8337, 1.0070, 0.9428, 0.8793, 0.8165, 0.7543,\n 0.6928, 0.6319, 0.8003, 0.7395, 0.6794, 0.6198, 0.5608, 0.5023, 0.4444,\n 0.6083, 0.5505, 0.4932, 0.6547, 0.8147, 0.7570, 0.9152, 0.8577, 0.8006,\n 0.7441, 0.6880, 0.6325, 0.5774, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143,\n 0.4611, 0.4082, 0.5592, 0.5064, 0.4540, 0.6030, 0.5507, 0.4988, 0.6460,\n 0.5941, 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.3311, 0.2828, 0.2349, 0.3746, 0.3267,\n 0.2791, 0.4174, 0.3698, 0.3225, 0.2756, 0.2289, 0.1826, 0.1365, 0.2722,\n 0.2261, 0.1803, 0.1348, 0.0896, 0.0447, 0.0000, 0.1332, 0.0886, 0.0442,\n 0.1761, 0.1317, 0.2626, 0.3928, 0.3482, 0.3038, 0.2596, 0.2158, 0.1721,\n 0.1287, 0.2568, 0.2134, 0.1703, 0.1273, 0.0847, 0.0422, 0.0000, 0.1260,\n 0.0838, 0.0418, 0.1667, 0.1247, 0.2487, 0.3721, 0.3299, 0.2879, 0.2462,\n 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "139", + "Fraction of T in Greenlist": "69.8%", + "z-score": "14.6", + "p value": "1.19e-48", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.3434, 6.5433, 6.7390, 6.5561, 6.7489, 6.9378,\n 7.1232, 6.9488, 6.7795, 6.9631, 7.1435, 6.9803, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.5340, 7.6996, 7.8628, 8.0238,\n 8.1825, 8.0358, 8.1929, 8.3480, 8.2054, 8.3589, 8.5105, 8.6603,\n 8.5218, 8.3859, 8.5347, 8.6817, 8.5491, 8.4188, 8.5649, 8.7093,\n 8.8522, 8.7250, 8.8667, 9.0068, 9.1455, 9.2828, 9.4188, 9.5534,\n 9.4301, 9.5637, 9.6960, 9.5751, 9.7065, 9.8367, 9.9656, 9.8473,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.2706, 10.3923, 10.5131, 10.6329, 10.7518, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 11.1197, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.0368, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.6139, 11.7222, 11.8299, 11.7320, 11.8392,\n 11.9457, 11.8491, 11.9551, 12.0605, 12.1652, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.0902, 11.9977, 12.1012, 12.2040, 12.3063, 12.2150, 12.3168,\n 12.4181, 12.5188, 12.6190, 12.7187, 12.8179, 12.7279, 12.8267, 12.9249,\n 12.8359, 12.9337, 13.0311, 13.1279, 13.0400, 12.9527, 13.0493, 13.1453,\n 13.0590, 12.9732, 13.0690, 13.1644, 13.2593, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.6514, 13.7434, 13.8350, 13.7521,\n 13.8434, 13.9343, 14.0248, 13.9427, 13.8613, 13.9515, 14.0414, 13.9606,\n 13.8804, 13.9700, 14.0593, 13.9797, 13.9007, 13.9897, 14.0784, 14.1667,\n 14.0884, 14.1764, 14.2640, 14.3513, 14.4382, 14.5248, 14.6110])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I saw Jim yelling at some guy in a military uniform with a huge red beard. I don't know why he was, but he looked very unhappy.\nWith pronoun replaced: I don't know why the guy in uniform was, but he looked very unhappy.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, 0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.8819, 1.2309, 1.0835, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.4076, 1.5823, 1.5131, 1.4446, 1.3770, 1.5483, 1.7178, 1.6499,\n 1.5828, 1.5164, 1.4506, 1.3856, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.2337, 1.1721, 1.3333, 1.2719, 1.2111, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.3482,\n 0.3038, 0.2596, 0.2158, 0.1721, 0.1287, 0.0856, 0.0427, 0.0000,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "122", + "Fraction of T in Greenlist": "61.3%", + "z-score": "11.8", + "p value": "1.4e-32", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 3.2998, 3.7097, 3.2660,\n 3.6566, 4.0166, 3.6556, 3.3333, 3.0424, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.2517, 2.5820, 2.8977, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 3.2660,\n 3.1156, 2.9704, 2.8301, 3.0792, 2.9424, 3.1844, 3.0509, 3.2863,\n 3.1558, 3.0290, 3.2577, 3.4816, 3.3566, 3.2348, 3.4528, 3.6667,\n 3.8765, 3.7559, 3.6380, 3.8431, 3.7273, 3.6141, 3.8146, 3.7033,\n 3.5942, 3.4873, 3.3824, 3.2796, 3.1787, 3.3729, 3.5642, 3.7528,\n 3.6522, 3.8376, 3.7383, 3.9208, 4.1008, 4.2784, 4.4537, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.0332, 4.9346, 5.0990, 5.2615, 5.1640,\n 5.0679, 4.9731, 5.1332, 5.2915, 5.4482, 5.3541, 5.5090, 5.4160,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.5569, 6.6973, 6.8364, 6.7456, 6.6559, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.3333, 7.2443, 7.1563, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.7155, 7.6315, 7.7566, 7.8808, 7.7976, 7.7152, 7.6335, 7.7567,\n 7.8791, 7.7981, 7.9196, 7.8393, 7.9600, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.3550, 8.2760, 8.3927, 8.3143, 8.2365, 8.3525, 8.4678,\n 8.5824, 8.5052, 8.6190, 8.7323, 8.8448, 8.9567, 9.0679, 9.1785,\n 9.2885, 9.3979, 9.5066, 9.6148, 9.7224, 9.6456, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 9.8433, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 9.9642, 10.0668, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.5725, 10.6722, 10.7714, 10.8702, 10.9685, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.1164, 11.2129, 11.3091, 11.4047, 11.5000,\n 11.4286, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.8280])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Alice tried frantically to stop her daughter from chatting at the party, leaving us to wonder why she was behaving so strangely.\nWith pronoun replaced: Alice's daughter was behaving so strangely.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.0000, 1.3093, 1.1793, 1.0541, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.6590, 1.9245, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.4968, 1.3926, 1.2910, 1.1918, 1.4317, 1.3333,\n 1.2372, 1.1431, 1.0510, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.1088, 1.0265, 0.9456, 0.8660,\n 1.0742, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.8926, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.6222, -0.6713, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.3362, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.4019, -1.2623, -1.3035, -1.1651, -1.2063, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.0565, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.3377, -1.3771, -1.4162, -1.2839, -1.3230, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.2179, -1.0890, -0.9608, -0.8333,\n -0.8727, -0.7461, -0.7856, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "137", + "Fraction of T in Greenlist": "68.8%", + "z-score": "14.3", + "p value": "1.38e-46", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.8411, 3.6148, 3.9056, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 4.7469,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 7.2488, 7.4061, 7.5615, 7.7150, 7.5907, 7.7426, 7.8928,\n 8.0413, 7.9196, 8.0667, 8.2121, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.4281,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.6141, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 10.1199, 10.2390, 10.3571, 10.2562,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.8498, 10.9626,\n 11.0746, 10.9769, 11.0883, 11.1991, 11.1026, 11.2127, 11.1172, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.3791, 11.4857,\n 11.5917, 11.6971, 11.6059, 11.7108, 11.8151, 11.7249, 11.8287, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.0611, 12.1622, 12.2628, 12.3629,\n 12.2758, 12.3754, 12.4746, 12.3883, 12.4870, 12.4015, 12.4998, 12.5976,\n 12.6949, 12.7918, 12.7073, 12.8037, 12.8997, 12.9952, 12.9116, 13.0067,\n 13.1014, 13.0185, 13.1129, 13.0307, 13.1246, 13.2182, 13.3113, 13.4040,\n 13.3227, 13.4150, 13.5069, 13.5985, 13.5179, 13.6091, 13.6999, 13.6201,\n 13.7106, 13.6313, 13.7215, 13.8113, 13.9007, 13.9897, 13.9113, 14.0000,\n 14.0884, 14.1764, 14.0986, 14.1863, 14.2737, 14.1966, 14.2836])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Jane knocked on Susan's door but she did not answer.\nWith pronoun replaced: Jane did not answer.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.3820, -1.4434,\n -1.2174, -0.9949, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.0887,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.0498, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.3093,\n -1.3578, -1.1896, -1.0229, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.4289,\n -1.4742, -1.3166, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.5396,\n -1.5822, -1.4335, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.6827, -1.7219, -1.7609, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.6444, -1.6830, -1.5492, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.7039, -1.5752, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 0.8165,\n 1.1055, 0.9901, 0.8783, 1.1547, 1.4237, 1.6859, 1.5717, 1.8257,\n 2.0738, 2.3163, 2.5533, 2.4371, 2.6681, 2.8943, 2.7791, 2.6667,\n 2.8868, 2.7761, 2.9913, 2.8823, 3.0929, 3.2998, 3.1918, 3.3947,\n 3.5942, 3.7905, 3.9837, 4.1740, 4.3614, 4.5461, 4.7281, 4.9075,\n 4.7980, 4.6904, 4.8669, 4.7610, 4.9348, 4.8305, 5.0019, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.9346, 5.0990, 5.0017, 4.9058,\n 4.8113, 4.7181, 4.8797, 4.7875, 4.6967, 4.6070, 4.5186, 4.6775,\n 4.5899, 4.7469, 4.9023, 4.8154, 4.9691, 4.8830, 5.0350, 4.9497,\n 5.1000, 5.2489, 5.3964, 5.5426, 5.6874, 5.8310, 5.9732, 5.8878,\n 5.8034, 5.9442, 5.8605, 6.0000, 5.9171, 6.0553, 6.1924, 6.3283,\n 6.2459, 6.1644, 6.0837, 6.2183, 6.3517, 6.4842, 6.4040, 6.3246,\n 6.2459, 6.1680, 6.2990, 6.2217, 6.3517, 6.2750, 6.4039, 6.3278,\n 6.4558, 6.5828, 6.7090, 6.8343, 6.7585, 6.6833, 6.8076, 6.9310,\n 6.8564, 6.9789, 7.1007, 7.0265, 7.1474, 7.2675, 7.3869, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.7096, 7.8253, 7.9403, 8.0546,\n 8.1683, 8.2813, 8.3937, 8.5054, 8.6165, 8.7270, 8.6537, 8.5810,\n 8.6908, 8.8000, 8.9086, 9.0167, 8.9444, 9.0518, 9.1587, 9.2651,\n 9.3708, 9.4761, 9.5808, 9.6850, 9.7886, 9.8918, 9.8198, 9.7483,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.5833, 10.6817, 10.7795, 10.8770, 10.9740, 11.0705, 11.0000,\n 11.0961, 11.1919, 11.2872, 11.3820, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The politicians far away in Washington could not know the settlers so they must make rules to regulate them.\nWith pronoun replaced: The politicians must make rules to regulate them.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -1.9245, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.2810, -1.3482, -1.4142, -1.4791, -1.2344,\n -1.3000, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.0646, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.7223, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.5407, -0.3769, -0.2144, -0.2669, -0.1063, -0.1588, -0.2108,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.2949, -0.3428, -0.3904, -0.4376, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.1898, -0.0473, -0.0943, 0.0470, 0.0000,\n -0.0467, -0.0930, 0.0464, 0.0000, -0.0461, -0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.3091, -0.1761, -0.2195, -0.0875, 0.0436, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.0429, 0.0856, 0.0427, 0.0000,\n -0.0424, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.1240, -0.1650, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330, 2.1170,\n 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.0415, 4.3409, 4.6268,\n 4.9008, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855, 5.4271, 5.2085, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 5.3072, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.2204, 5.0576, 4.8999, 4.7469, 4.9592, 4.8107, 4.6664, 4.5260, 4.3894,\n 4.5968, 4.8003, 4.6667, 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854,\n 5.0602, 5.2463, 5.4295, 5.3067, 5.4870, 5.3666, 5.2485, 5.1326, 5.3100,\n 5.1962, 5.0844, 4.9747, 5.1490, 5.0410, 4.9348, 4.8305, 4.7278, 4.8990,\n 5.0680, 4.9666, 4.8667, 4.7683, 4.6715, 4.8375, 5.0017, 5.1640, 5.0679,\n 5.2281, 5.3867, 5.2915, 5.1977, 5.1051, 5.0138, 4.9237, 4.8347, 4.9904,\n 4.9023, 4.8154, 4.7296, 4.6448, 4.7980, 4.9497, 4.8655, 5.0156, 4.9322,\n 5.0807, 5.2278, 5.3736, 5.2906, 5.2085, 5.3526, 5.4956, 5.4140, 5.5556,\n 5.4747, 5.3947, 5.3156, 5.4554, 5.3769, 5.2992, 5.2223, 5.3606, 5.2842,\n 5.2086, 5.1338, 5.2705, 5.1962, 5.3316, 5.2578, 5.1848, 5.3189, 5.2463,\n 5.1744, 5.3072, 5.4391, 5.5701, 5.4983, 5.6282, 5.7572, 5.6858, 5.6150,\n 5.5448, 5.4752, 5.4062, 5.3378, 5.4650, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.3220, 5.2560, 5.1905, 5.3153, 5.2501, 5.3740, 5.3092, 5.2449, 5.3677,\n 5.3038, 5.2402, 5.3621, 5.4832, 5.6036, 5.5402, 5.6598, 5.7787, 5.7155,\n 5.6527, 5.5904, 5.5284, 5.4670, 5.4059, 5.5234, 5.4626, 5.4023, 5.3423,\n 5.4588, 5.3991, 5.3398, 5.2809, 5.3964, 5.5113, 5.4526, 5.3941, 5.5082,\n 5.4501, 5.3923, 5.5056, 5.4480, 5.3909, 5.3340, 5.2775, 5.3898, 5.3335,\n 5.2776, 5.3891, 5.3333, 5.2779, 5.2229, 5.1681, 5.2786, 5.3886, 5.4981,\n 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: A number of times Henry had been present at interviews which his father had had with noted detectives who desired his aid in solving perplexing mysteries, and those occasions stood out as red-letter days for him.\nWith pronoun replaced: Those occasions stood out as red-letter days for Henry.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "33", + "Fraction of T in Greenlist": "16.6%", + "z-score": "-2.74", + "p value": "0.997", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.2174, -1.2792, -1.3402, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.5333, -1.5894, -1.6449, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.6499,\n -1.7000, -1.7496, -1.5667, -1.6166, -1.6660, -1.7150, -1.7635, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -2.1082,\n -2.1519, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.1576, -2.1980, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.2966, -2.3351, -2.3735,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.5315, -2.5683, -2.4283, -2.4653, -2.5020, -2.5386, -2.5750, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.6178, -2.6534, -2.6888, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.6949, -2.5618, -2.5969, -2.6319, -2.5000,\n -2.5351, -2.5700, -2.6047, -2.6393, -2.6737, -2.7080, -2.7421])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.8868, 3.2206, 3.5382,\n 3.8411, 4.1312, 4.4096, 4.1851, 3.9727, 4.2426, 4.0415, 3.8497, 4.1111,\n 3.9279, 4.1812, 4.0056, 4.2515, 4.4907, 4.7237, 4.9507, 4.7819, 5.0037,\n 4.8407, 4.6829, 4.8999, 5.1121, 4.9592, 4.8107, 5.0186, 5.2223, 5.4222,\n 5.6183, 5.8108, 6.0000, 6.1859, 6.3687, 6.2251, 6.0849, 5.9479, 6.1283,\n 5.9944, 6.1721, 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828,\n 6.3509, 6.5166, 6.3960, 6.2776, 6.4413, 6.3249, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.4667, 6.3578, 6.5137, 6.4065, 6.5607, 6.7132, 6.6075,\n 6.5033, 6.6541, 6.8034, 6.7006, 6.5993, 6.7469, 6.8931, 6.7931, 6.6944,\n 6.8391, 6.7416, 6.6454, 6.7886, 6.9305, 7.0711, 6.9759, 6.8819, 6.7890,\n 6.6973, 6.6066, 6.7456, 6.8834, 6.7937, 6.7049, 6.8414, 6.9768, 6.8889,\n 6.8019, 6.9361, 7.0692, 6.9830, 6.8977, 7.0296, 6.9451, 6.8615, 6.9923,\n 7.1220, 7.2508, 7.1678, 7.0857, 7.0043, 6.9237, 6.8439, 6.9714, 7.0980,\n 7.0187, 6.9402, 7.0658, 7.1904, 7.1125, 7.0353, 7.1590, 7.2818, 7.2051,\n 7.1291, 7.2510, 7.1755, 7.1007, 7.2217, 7.3419, 7.4613, 7.3869, 7.3131,\n 7.2399, 7.1673, 7.0952, 7.2136, 7.3312, 7.2596, 7.1886, 7.3054, 7.4215,\n 7.3508, 7.2807, 7.3960, 7.5106, 7.4409, 7.3717, 7.4855, 7.4168, 7.3485,\n 7.4616, 7.5740, 7.6859, 7.6179, 7.5504, 7.4833, 7.4167, 7.3506, 7.4615,\n 7.5719, 7.5061, 7.4407, 7.5503, 7.6594, 7.5944, 7.5297, 7.6381, 7.7460,\n 7.6816, 7.6177, 7.7249, 7.6613, 7.5981, 7.7047, 7.8107, 7.9162, 7.8533,\n 7.7907, 7.7285, 7.6667, 7.6052, 7.7099, 7.8142, 7.7530, 7.6922, 7.7958,\n 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam pulled up a chair to the piano, but it was broken, so he had to stand instead.\nWith pronoun replaced: The chair was broken, so he had to stand instead.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -1.9245, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.9542,\n -2.9988, -3.0429, -2.8446, -2.8893, -2.9336, -2.7393, -2.7840, -2.5927,\n -2.4035, -2.4495, -2.4951, -2.5403, -2.5852, -2.6296, -2.6737, -2.7175,\n -2.7608, -2.8039, -2.8465, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.2660,\n -3.3041, -3.3420, -3.3797, -3.4171, -3.4543, -3.4913, -3.5280, -3.5645,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.7439, -3.7791, -3.8142, -3.8490,\n -3.6919, -3.7270, -3.5714, -3.6068, -3.6420, -3.6770, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.6635, -3.6980, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.8676, -3.9010, -3.7534, -3.6067, -3.6407, -3.6745, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.6645, -3.6979, -3.5555, -3.5890, -3.4478, -3.4816,\n -3.5151, -3.3754, -3.2365, -3.2705, -3.3044, -3.3381, -3.3716, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.3686, -3.4017, -3.4346, -3.3003, -3.3333,\n -3.2000, -3.2332, -3.2662, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "65", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "41.5%", + "z-score": "3.08", + "p value": "0.00104", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 2.3113, 2.1909,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.1054, 2.3333,\n 2.2269, 2.1229, 2.3445, 2.2418, 2.1412, 2.3570, 2.5690, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823, 2.8868,\n 3.0793])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Since it was raining, I carried the newspaper over my backpack to keep it dry.\nWith pronoun replaced: I carried the newspaper over my backpack to keep the backpack dry.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.8321, 1.0954,\n 1.3525, 1.2472, 1.4968, 1.7408, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.4697, 1.3744, 1.6013, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.3198, 1.2366, 1.1547,\n 1.0742, 0.9949, 0.9169, 0.8402, 1.0426, 0.9661, 0.8907, 1.0887,\n 1.2839, 1.2081, 1.4000, 1.5894, 1.5133, 1.6997, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.7638, 1.6908, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.5823, 1.5131, 1.6854, 1.8559, 2.0247, 1.9548, 2.1213,\n 2.2862, 2.4495, 2.3791, 2.3094, 2.2405, 2.1723, 2.1049, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.0000, 2.1567, 2.0918, 2.0276, 1.9640,\n 1.9009, 1.8385, 1.9920, 1.9298, 1.8682, 1.8071, 1.7465, 1.6865,\n 1.6271, 1.7772, 1.7179, 1.6591, 1.8074, 1.9545, 2.1005, 2.0412,\n 2.1858, 2.1268, 2.2699, 2.2111, 2.1527, 2.0948, 2.2361, 2.3764,\n 2.3183, 2.2608, 2.3995, 2.3422, 2.4797, 2.6163, 2.7520, 2.6943,\n 2.6370, 2.5802, 2.5238, 2.4678, 2.4122, 2.5456, 2.4902, 2.4351,\n 2.3805, 2.5123, 2.4578, 2.4037, 2.3500, 2.2966, 2.2436, 2.3735,\n 2.3206, 2.2680, 2.2159, 2.1640, 2.1125, 2.0613, 2.1892, 2.1381,\n 2.0873, 2.2140, 2.3400, 2.4653, 2.5898, 2.5386, 2.4877, 2.4371,\n 2.3868, 2.5099, 2.4597, 2.4099, 2.3603, 2.4822, 2.4327, 2.3835,\n 2.3346, 2.2860, 2.2377, 2.3580, 2.3098, 2.4294, 2.5483, 2.6667,\n 2.6182, 2.7358, 2.8528, 2.9692, 2.9205, 2.8721, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.8488, 4.6663, 4.4907,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.2204, 5.4322, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.0751, 6.2668, 6.4550, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.3687, 6.5485, 6.4051, 6.5823, 6.7568, 6.9286, 7.0980,\n 7.2648, 7.4294, 7.5916, 7.4536, 7.6140, 7.4790, 7.6376, 7.7942,\n 7.9489, 8.1016, 8.2525, 8.1216, 7.9931, 8.1428, 8.2908, 8.4371,\n 8.5819, 8.4566, 8.6000, 8.7419, 8.8823, 9.0213, 9.1589, 9.0370,\n 9.1735, 9.3086, 9.1890, 9.3231, 9.2055, 9.3386, 9.2229, 9.3550,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.0242, 10.1479, 10.0385, 10.1614, 10.2833, 10.4042, 10.5243, 10.6434,\n 10.5363, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 10.7955, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.2623, 11.3740, 11.4849, 11.3842,\n 11.4945, 11.3950, 11.5048, 11.4065, 11.5157, 11.4184, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.9551, 12.0605, 12.1652, 12.0699, 12.1741, 12.0798,\n 12.1836, 12.2868, 12.3895, 12.4915, 12.5930, 12.5001, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.8095, 12.7187, 12.8179, 12.9165, 13.0146, 13.1122,\n 13.2093, 13.1198, 13.2166, 13.3128, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.2410, 13.3361, 13.2499, 13.3447, 13.4390, 13.5329, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.8193, 13.9111, 13.8270, 13.9185, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.1974, 14.1149, 14.2046, 14.2939, 14.3828, 14.4714,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.7468, 14.8333,\n 14.9195, 14.8396, 14.9255, 14.8462, 14.9318, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The politicians far away in Washington could not know the settlers so they must make rules to regulate them.\nWith pronoun replaced: They must make rules to regular the politicians.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.2649, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.2041,\n -0.2542, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.5706, -0.6128, -0.6547, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.6667,\n -0.7065, -0.5803, -0.4548, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "158", + "Fraction of T in Greenlist": "79.4%", + "z-score": "17.7", + "p value": "1.43e-70", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 1.2702, 1.1323, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.8783, 1.1547, 1.0441, 1.3112, 1.5717, 1.8257,\n 2.0738, 2.3163, 2.2011, 2.4371, 2.3238, 2.2133, 2.4422, 2.6667,\n 2.8868, 3.1027, 3.3147, 3.2026, 3.4101, 3.6141, 3.8146, 4.0119,\n 4.2060, 4.3970, 4.5850, 4.7703, 4.9528, 5.1326, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.9954, 5.8812, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.3401, 7.4878,\n 7.6339, 7.7784, 7.9216, 8.0632, 8.2035, 8.3425, 8.4801, 8.6164,\n 8.7515, 8.6418, 8.7758, 8.9086, 9.0401, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 10.9109,\n 11.0254, 11.1392, 11.2522, 11.3644, 11.4759, 11.3740, 11.4849, 11.5950,\n 11.7045, 11.8132, 11.9213, 12.0286, 12.1353, 12.2414, 12.3468, 12.4516,\n 12.5557, 12.6592, 12.7622, 12.8645, 12.9662, 12.8679, 12.9692, 13.0699,\n 13.1701, 13.2698, 13.3689, 13.2722, 13.3710, 13.4691, 13.5668, 13.6640,\n 13.7606, 13.8567, 13.9524, 14.0475, 14.1422, 14.2364, 14.3302, 14.4234,\n 14.5162, 14.6086, 14.7005, 14.6071, 14.6987, 14.7899, 14.8807, 14.9711,\n 15.0610, 14.9691, 15.0588, 15.1481, 15.2369, 15.3254, 15.4135, 15.5012,\n 15.5885, 15.6754, 15.7619, 15.8481, 15.9339, 16.0194, 16.1045, 16.1892,\n 16.2736, 16.1846, 16.2688, 16.3526, 16.4361, 16.5193, 16.6021, 16.5144,\n 16.5970, 16.6793, 16.7614, 16.8430, 16.9244, 17.0055, 17.0862, 17.1667,\n 17.2468, 17.3267, 17.4062, 17.4855, 17.5644, 17.6431, 17.7215])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Anna did a lot better than her good friend Lucy on the test because she had studied so hard.\nWith pronoun replaced: Anna had studied so hard.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, 0.0000, 0.1974, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.4575, -0.3038, -0.1513, -0.2010, -0.0501, 0.0998, 0.2485, 0.1980,\n 0.3453, 0.4915, 0.6366, 0.7807, 0.7293, 0.8721, 1.0139, 0.9623,\n 0.9110, 0.8601, 1.0000, 1.1390, 1.0879, 1.0371, 0.9867, 0.9366,\n 0.8868, 1.0235, 1.1593, 1.1094, 1.2441, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.3284, 1.2804, 1.4087, 1.5363, 1.4881, 1.4402, 1.3926,\n 1.5189, 1.6444, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.5323,\n 1.6555, 1.6087, 1.7310, 1.8527, 1.8058, 1.7592, 1.7128, 1.8333,\n 1.7870, 1.7410, 1.6951, 1.6496, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.4174, 5.6569,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.9282, 6.7390, 6.9307, 6.7489, 6.9378,\n 6.7625, 6.9488, 7.1317, 6.9631, 7.1435, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.3485, 7.5186, 7.6862, 7.5340, 7.6996, 7.5514, 7.7152,\n 7.8766, 8.0358, 8.1929, 8.0498, 7.9097, 8.0656, 7.9286, 8.0829,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.5491, 8.4188, 8.5649, 8.4371,\n 8.3116, 8.4566, 8.3333, 8.4770, 8.3560, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.5435, 8.4285, 8.5672, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.3422, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.3088, 9.4327, 9.3333, 9.4563, 9.5784, 9.6995, 9.8198,\n 9.9392, 9.8414, 9.7447, 9.6490, 9.7678, 9.6732, 9.7912, 9.6977,\n 9.6050, 9.5133, 9.4225, 9.3326, 9.2435, 9.3611, 9.2729, 9.1856,\n 9.0991, 9.0134, 9.1302, 9.0453, 9.1615, 9.0773, 9.1927, 9.1094,\n 9.2240, 9.1414, 9.0595, 9.1735, 9.2867, 9.3993, 9.5112, 9.4301,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.6016, 9.7109, 9.8197, 9.7405,\n 9.8486, 9.9562, 10.0631, 10.1695, 10.0910, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.0701, 10.1749, 10.0987, 10.2029, 10.3065, 10.4097, 10.5123,\n 10.6144, 10.7159, 10.6404, 10.5654, 10.6665, 10.5921, 10.6927, 10.6187,\n 10.7189, 10.6455, 10.7451, 10.6722, 10.5998, 10.5278, 10.4563, 10.3853,\n 10.3148, 10.4140, 10.3439, 10.2743, 10.2050, 10.1363, 10.2350, 10.1667,\n 10.2650, 10.1970, 10.2949, 10.2273, 10.3248, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mark was close to Mr. Singer's heels. He heard him calling for the captain, promising him, in the jargon everyone talked that night, that not one thing should be damaged on the ship except only the ammunition, but the captain and all his crew had best stay in the cabin until the work was over.\nWith pronoun replaced: Mr. Singer heard him calling for the captain\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "53", + "Fraction of T in Greenlist": "26.6%", + "z-score": "0.532", + "p value": "0.297", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.1431, -0.8893, -0.9608, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, 0.0000, -0.0626, 0.1245, 0.3095, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.2624, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.4082,\n -0.2542, -0.3038, -0.3531, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, 0.0983, 0.0490, 0.0000, 0.1459, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.4536, 0.5879, 0.7213, 0.6742, 0.6274, 0.7595, 0.7127,\n 0.6662, 0.6199, 0.5740, 0.7044, 0.6584, 0.6128, 0.5674, 0.6963,\n 0.6509, 0.6058, 0.7336, 0.6885, 0.6437, 0.5991, 0.5548, 0.5108,\n 0.6367, 0.5927, 0.7177, 0.6737, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.5803, 0.7029, 0.6598, 0.6170, 0.5744, 0.5321])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "168", + "Fraction of T in Greenlist": "84.4%", + "z-score": "19.4", + "p value": "8.63e-84", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.9803, 7.1583, 7.3333,\n 7.5056, 7.6751, 7.5186, 7.6862, 7.8512, 8.0139, 8.1742, 8.3324,\n 8.4884, 8.6423, 8.7943, 8.6461, 8.7967, 8.9455, 9.0924, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.8020, 9.6612, 9.7989, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.4599, 11.5799, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 12.0386, 12.1533, 12.2671, 12.3801, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.7017, 12.8110, 12.9196, 13.0274, 13.1344,\n 13.0154, 13.1221, 13.2280, 13.3333, 13.4379, 13.5419, 13.6451, 13.7477,\n 13.8497, 13.7347, 13.8364, 13.9375, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.4200, 14.5173, 14.6141, 14.7103, 14.8059, 14.9011,\n 14.9957, 15.0898, 15.1834, 15.0756, 15.1690, 15.2619, 15.3543, 15.4463,\n 15.5378, 15.6288, 15.7194, 15.8096, 15.7048, 15.7948, 15.8843, 15.9734,\n 16.0620, 16.1503, 16.2381, 16.3255, 16.4125, 16.3106, 16.3975, 16.4839,\n 16.5700, 16.6557, 16.7410, 16.8259, 16.9105, 16.9947, 16.8953, 16.9794,\n 17.0631, 17.1464, 17.2294, 17.3121, 17.3944, 17.4763, 17.5579, 17.4611,\n 17.5426, 17.6237, 17.7046, 17.7851, 17.8653, 17.9452, 18.0248, 18.1041,\n 18.0095, 18.0886, 18.1675, 18.2461, 18.3243, 18.4023, 18.4800, 18.5574,\n 18.6345, 18.5420, 18.6190, 18.6958, 18.7722, 18.8484, 18.9243, 19.0000,\n 19.0754, 19.1505, 19.0600, 19.1350, 19.2098, 19.2843, 19.3586])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Fred is the only man still alive who remembers my great-grandfather. He was a remarkable man.\nWith pronoun replaced: My great-grandfather was a remarkable man.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "66", + "Fraction of T in Greenlist": "33.2%", + "z-score": "2.66", + "p value": "0.0039", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, 0.4804, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 1.0948, 1.0000,\n 1.2372, 1.4697, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.5430,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.2366, 1.1547,\n 1.3606, 1.2792, 1.4812, 1.6803, 1.8766, 1.7942, 1.7130, 1.6330,\n 1.5542, 1.7450, 1.9333, 1.8543, 1.7765, 1.6997, 1.6239, 1.5492,\n 1.4755, 1.4027, 1.5843, 1.5119, 1.6908, 1.8677, 2.0426, 1.9695,\n 1.8972, 1.8257, 1.7552, 1.9262, 2.0954, 2.0247, 1.9548, 1.8856,\n 1.8173, 1.7496, 1.6828, 1.6166, 1.7809, 1.7150, 1.8773, 2.0381,\n 2.1974, 2.1309, 2.0651, 2.0000, 1.9355, 2.0918, 2.2468, 2.1822,\n 2.1182, 2.0548, 1.9920, 1.9298, 1.8682, 1.8071, 1.9582, 1.8974,\n 2.0470, 2.1954, 2.3426, 2.2813, 2.2205, 2.1602, 2.1005, 2.2454,\n 2.3891, 2.3293, 2.2699, 2.2111, 2.1527, 2.0948, 2.0373, 1.9803,\n 2.1210, 2.0642, 2.2037, 2.3422, 2.4797, 2.4225, 2.3657, 2.3094,\n 2.2535, 2.3891, 2.5238, 2.4678, 2.4122, 2.3570, 2.3022, 2.2478,\n 2.1938, 2.1401, 2.2723, 2.2188, 2.3500, 2.4803, 2.6099, 2.5560,\n 2.5026, 2.4495, 2.3967, 2.5247, 2.6519, 2.5990, 2.5466, 2.4944,\n 2.4426, 2.3912, 2.3400, 2.2892, 2.4142, 2.3635, 2.4877, 2.6112,\n 2.7340, 2.6830, 2.6323, 2.5820, 2.5319, 2.6534, 2.7741, 2.7240,\n 2.6742, 2.6247, 2.5754, 2.5265, 2.4778, 2.4294, 2.5483, 2.5000,\n 2.6182, 2.7358, 2.6874, 2.6393, 2.5915, 2.5439, 2.6603])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "61.1%", + "z-score": "11.7", + "p value": "4.23e-32", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.6000, 5.8068, 6.0093, 6.2075,\n 6.0412, 5.8797, 6.0751, 6.2668, 6.1107, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.0421, 6.2251, 6.4051, 6.5823, 6.7568, 6.6172, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.6823, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.4801, 8.3702,\n 8.5067, 8.6418, 8.5337, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.1101, 9.2376, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.5331, 9.6559, 9.7778, 9.8987, 9.7986, 9.9187, 10.0380,\n 9.9392, 10.0577, 10.1754, 10.2923, 10.1948, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.3496, 10.2554, 10.1621, 10.0698, 9.9783, 10.0926, 10.0021,\n 9.9124, 10.0261, 9.9373, 10.0504, 10.1627, 10.2743, 10.1865, 10.0995,\n 10.2106, 10.3209, 10.2348, 10.3445, 10.4537, 10.3683, 10.2837, 10.1999,\n 10.1167, 10.0342, 9.9524, 9.8712, 9.7908, 9.7109, 9.6317, 9.7405,\n 9.8486, 9.7701, 9.6921, 9.7997, 9.9067, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.5625, 10.4858, 10.5884, 10.5123,\n 10.6144, 10.7159, 10.8170, 10.9176, 11.0177, 11.1173, 11.0418, 10.9669,\n 11.0661, 11.1648, 11.0904, 11.1886, 11.1148, 11.2126, 11.1392, 11.0663,\n 10.9939, 11.0913, 11.1883, 11.2848, 11.3809, 11.4766, 11.4047, 11.3333,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I couldn't put the pot on the shelf because it was too high.\nWith pronoun replaced: The shelf was too high.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.8779, -0.9258, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -0.9245, -0.9676, -1.0105, -0.8700, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -1.1380, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -1.1007, -1.1399, -1.0106, -1.0499, -1.0890, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.0421, 6.2251, 6.0849, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.4370, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.5333, 7.6823, 7.5664, 7.4524, 7.6000, 7.7460,\n 7.8905, 8.0335, 7.9216, 7.8113, 7.9530, 7.8444, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.3217, 8.4560, 8.5891, 8.4853,\n 8.3828, 8.5149, 8.6459, 8.5448, 8.6747, 8.8036, 8.9314, 8.8318,\n 8.9586, 9.0845, 9.2094, 9.1111, 9.0139, 9.1380, 9.0419, 8.9469,\n 9.0702, 9.1925, 9.3140, 9.4346, 9.3408, 9.2480, 9.3678, 9.2760,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.9124, 9.8236, 9.7356, 9.8494, 9.9625, 9.8753, 9.9878, 10.0995,\n 10.2106, 10.1243, 10.2348, 10.3445, 10.4537, 10.3683, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.8204, 10.9259, 10.8423, 10.9473, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.2789, 11.3812, 11.2992, 11.4009, 11.5022,\n 11.6029, 11.5217, 11.6219, 11.7217, 11.8210, 11.7405, 11.8393, 11.9377,\n 12.0355, 11.9558, 12.0532, 12.1502, 12.2467, 12.1677, 12.2638, 12.3595,\n 12.4547, 12.3764, 12.4713, 12.5657, 12.6597, 12.5820, 12.6757, 12.7688,\n 12.8616, 12.7847, 12.8771, 12.9691, 13.0608, 12.9845, 13.0758, 13.1667,\n 13.2572, 13.1815, 13.2717, 13.3615, 13.4510, 13.3759, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: No one joins Facebook to be sad and lonely. But a new study from the University of Wisconsin psychologist George Lincoln argues that that's exactly how it makes us feel.\nWith pronoun replaced: That's exactly how the study makes us feel.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.5608, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -0.8296, -0.6713, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.7921,\n -0.6412, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -0.8811, -0.7396, -0.5990, -0.6430, -0.6868, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -0.8292, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.0390, -0.9119, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "164", + "Fraction of T in Greenlist": "82.4%", + "z-score": "18.7", + "p value": "2.31e-78", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.8008, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.7625, 6.9488, 6.7795, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.8355, 7.6751, 7.8420, 8.0064, 8.1684, 8.3281, 8.4856, 8.6410,\n 8.4884, 8.6423, 8.7943, 8.9443, 9.0924, 9.2388, 9.3834, 9.2376,\n 9.3811, 9.5230, 9.6632, 9.8020, 9.9392, 10.0750, 9.9352, 10.0701,\n 10.2036, 10.3358, 10.4667, 10.5963, 10.7246, 10.5903, 10.7179, 10.8444,\n 10.9697, 11.0938, 11.2169, 11.3389, 11.2094, 11.3308, 11.4512, 11.5706,\n 11.6890, 11.8065, 11.9230, 11.7978, 11.9138, 12.0289, 12.1432, 12.2565,\n 12.3690, 12.4807, 12.3595, 12.4708, 12.5812, 12.6909, 12.7998, 12.9080,\n 13.0154, 12.8978, 13.0048, 13.1111, 13.2167, 13.3217, 13.4259, 13.5295,\n 13.4152, 13.5185, 13.6211, 13.7230, 13.8244, 13.9251, 14.0253, 13.9140,\n 14.0139, 14.1131, 14.2118, 14.3099, 14.4075, 14.5045, 14.3961, 14.4928,\n 14.5890, 14.6847, 14.7799, 14.8746, 14.9687, 14.8629, 14.9568, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.3158, 15.4072, 15.4980, 15.5885,\n 15.6785, 15.7680, 15.8571, 15.7560, 15.8450, 15.9335, 16.0216, 16.1093,\n 16.1966, 16.2835, 16.1845, 16.2712, 16.3575, 16.4435, 16.5291, 16.6143,\n 16.6991, 16.6021, 16.6868, 16.7711, 16.8550, 16.9386, 17.0218, 17.1047,\n 17.0096, 17.0924, 17.1748, 17.2568, 17.3386, 17.4200, 17.5011, 17.4078,\n 17.4887, 17.5693, 17.6497, 17.7297, 17.8094, 17.8888, 17.7971, 17.8764,\n 17.9554, 18.0340, 18.1124, 18.1905, 18.2683, 18.1783, 18.2559, 18.3333,\n 18.4105, 18.4873, 18.5639, 18.6402, 18.5517, 18.6278, 18.7038])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bob paid for Charlie's college education. He is very generous.\nWith pronoun replaced: Bob is very generous.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165, 1.3472,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774, 0.9802, 0.8165,\n 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.9428, 1.2702, 1.5852, 1.4444,\n 1.3093, 1.1793, 1.0541, 0.9333, 1.2247, 1.5076, 1.3862, 1.2687, 1.1547,\n 1.0441, 0.9366, 0.8321, 0.7303, 0.6312, 0.5345, 0.4402, 0.6963, 0.6025,\n 0.5108, 0.4211, 0.6667, 0.5774, 0.8165, 1.0510, 0.9608, 0.8724, 1.0999,\n 1.3234, 1.2344, 1.1471, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.2366,\n 1.1547, 1.0742, 0.9949, 1.1991, 1.1202, 1.0426, 0.9661, 1.1648, 1.0887,\n 1.0136, 0.9396, 1.1333, 1.0596, 0.9869, 1.1767, 1.3641, 1.5492, 1.4755,\n 1.4027, 1.3308, 1.2599, 1.1898, 1.3697, 1.5475, 1.4771, 1.4076, 1.3389,\n 1.2710, 1.2039, 1.3770, 1.3101, 1.4809, 1.4142, 1.3483, 1.2831, 1.4506,\n 1.3856, 1.3213, 1.2577, 1.4222, 1.3587, 1.2959, 1.2337, 1.3954, 1.3333,\n 1.2719, 1.4313, 1.5892, 1.7457, 1.6837, 1.6222, 1.5613, 1.5010, 1.4412,\n 1.5945, 1.7465, 1.6865, 1.6271, 1.5681, 1.5097, 1.4517, 1.6008, 1.5430,\n 1.4857, 1.4289, 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.4410,\n 1.3862, 1.3318, 1.2778, 1.4201, 1.3663, 1.3128, 1.4535, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.4662, 1.6028, 1.7384, 1.6859, 1.6337,\n 1.5818, 1.5303, 1.4792, 1.6127, 1.5617, 1.5110, 1.4606, 1.4105, 1.3608,\n 1.4923, 1.4427, 1.3933, 1.3443, 1.4743, 1.4254, 1.3768, 1.3284, 1.4570,\n 1.4087, 1.3607, 1.4881, 1.6148, 1.7408, 1.6925, 1.6444, 1.5967, 1.5492,\n 1.5020, 1.6262, 1.7498, 1.7025, 1.6555, 1.6087, 1.5621, 1.5159, 1.6378,\n 1.5916, 1.5457, 1.6667, 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.5592,\n 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 4.1851, 4.4543, 4.7140,\n 4.5033, 4.7556, 5.0000, 5.2372, 5.0389, 5.2705, 5.4958, 5.7155,\n 5.5277, 5.7429, 5.9530, 6.1584, 5.9797, 6.1815, 6.3791, 6.5727,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.5158, 6.3687, 6.5485, 6.7254, 6.5823, 6.4425, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.5591, 6.7269, 6.5991, 6.7648, 6.6395,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.7710, 7.9196, 7.8000, 7.9472, 8.0928, 8.2369, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.1750, 8.3152, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.2796, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.2147, 9.3422, 9.4685, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.1379, 10.2562,\n 10.1564, 10.2740, 10.3908, 10.5067, 10.6218, 10.7362, 10.8498, 10.7517,\n 10.8647, 10.9769, 11.0883, 10.9917, 11.1026, 11.0070, 11.1172, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.4574, 11.5645, 11.6709, 11.5779, 11.6837,\n 11.7890, 11.8937, 11.8018, 11.9060, 11.8151, 11.9187, 11.8287, 11.9319,\n 12.0345, 12.1366, 12.2381, 12.3391, 12.4395, 12.3508, 12.4508, 12.5503,\n 12.6492, 12.5615, 12.6601, 12.5732, 12.6713, 12.5852, 12.6830, 12.7802,\n 12.8769, 12.9732, 13.0690, 13.1644, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.2791, 13.3728, 13.2895, 13.3829, 13.3002, 13.3933, 13.4859, 13.5781,\n 13.6698, 13.7612, 13.8522, 13.7706, 13.8613, 13.9515, 14.0414, 13.9606,\n 14.0502, 14.1393, 14.2282, 14.1482, 14.2367, 14.1573, 14.2455, 14.1667,\n 14.2546, 14.3422, 14.4294, 14.5162, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: John promised Bill to leave, so an hour later he left.\nWith pronoun replaced: John left.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.15", + "p value": "0.875", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -0.7698, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "91", + "Fraction of T in Greenlist": "45.7%", + "z-score": "6.75", + "p value": "7.24e-12", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321, 1.5403, 1.3608,\n 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284, 2.6558, 2.4910, 2.3333,\n 2.6186, 2.4659, 2.3190, 2.5924, 2.8577, 2.7136, 2.9704, 2.8301, 2.6943,\n 2.9424, 3.1844, 3.0509, 2.9212, 2.7952, 2.6726, 2.5533, 2.4371, 2.6681,\n 2.8943, 3.1160, 3.0000, 3.2167, 3.1027, 2.9913, 3.2026, 3.4101, 3.6141,\n 3.8146, 3.7033, 3.9001, 3.7905, 3.6831, 3.5777, 3.7700, 3.6662, 3.5642,\n 3.7528, 3.9386, 3.8376, 4.0205, 3.9208, 3.8228, 4.0024, 4.1797, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.6187, 3.7916, 3.9624, 4.1312, 4.0415,\n 4.2080, 4.1192, 4.0316, 4.1957, 4.3580, 4.5186, 4.6775, 4.5899, 4.7469,\n 4.6603, 4.5747, 4.4901, 4.6448, 4.5611, 4.4783, 4.6311, 4.7823, 4.7001,\n 4.8497, 4.7682, 4.6876, 4.8355, 4.9820, 4.9019, 4.8226, 4.7442, 4.6667,\n 4.5899, 4.5140, 4.6580, 4.8008, 4.9424, 4.8666, 5.0070, 4.9317, 4.8572,\n 4.9960, 5.1338, 5.2705, 5.4061, 5.3316, 5.4661, 5.3921, 5.3189, 5.2463,\n 5.3793, 5.3072, 5.2358, 5.3675, 5.4983, 5.4272, 5.5570, 5.4863, 5.4163,\n 5.5448, 5.6725, 5.6028, 5.5336, 5.4650, 5.3970, 5.3295, 5.2626, 5.3886,\n 5.5138, 5.6383, 5.5714, 5.6949, 5.6285, 5.5626, 5.6851, 5.8068, 5.9279,\n 6.0481, 5.9822, 6.1017, 6.0362, 5.9711, 5.9065, 6.0249, 5.9607, 5.8969,\n 6.0145, 6.1314, 6.0678, 6.1839, 6.1207, 6.0579, 6.1732, 6.2879, 6.2253,\n 6.1632, 6.1014, 6.0401, 5.9792, 5.9186, 6.0321, 6.1449, 6.2572, 6.1968,\n 6.3084, 6.2482, 6.1884, 6.2993, 6.4096, 6.5193, 6.6285, 6.5688, 6.6774,\n 6.6179, 6.5588, 6.5000, 6.6078, 6.5493, 6.4912, 6.5983, 6.7049, 6.6469,\n 6.7530])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Frank was upset with Tom because the toaster he had sold him didn't work.\nWith pronoun replaced: The toaster Frank had sold him didn't work.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "27", + "Fraction of T in Greenlist": "13.6%", + "z-score": "-3.72", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -1.4412, -1.5430, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -0.8729, -0.9649, -1.0541, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.3851, -2.4394, -2.4930, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.3333, -2.3842, -2.4344, -2.2226, -2.0137, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -3.0022, -3.0448, -3.0870, -3.1288, -3.1704,\n -3.2116, -3.2525, -3.2931, -3.3333, -3.3733, -3.1928, -3.2332, -3.2733,\n -3.3131, -3.3526, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.2408, -3.2796, -3.3182, -3.1500, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.4403, -3.4769, -3.5132, -3.5494, -3.5853, -3.4279, -3.4641,\n -3.5001, -3.5359, -3.5714, -3.4170, -3.4528, -3.4884, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.4780, -3.5131, -3.5480, -3.3989, -3.2509, -3.2863,\n -3.3216, -3.3567, -3.3916, -3.4263, -3.4609, -3.4953, -3.5295, -3.5635,\n -3.5973, -3.6310, -3.6645, -3.6979, -3.7311, -3.7641, -3.7970, -3.6556,\n -3.6887, -3.7216, -3.7543, -3.7869, -3.6477, -3.6805, -3.7131, -3.7455,\n -3.7778, -3.8100, -3.8420, -3.7055, -3.7376, -3.7697, -3.6345, -3.5000,\n -3.5325, -3.5648, -3.5970, -3.6291, -3.6610, -3.6927, -3.7244])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.8772, 3.7417, 3.6098, 3.8297, 4.0451, 3.9158, 4.1265, 4.3333,\n 4.5363, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.3716, 5.2463,\n 5.1236, 5.0034, 4.8857, 4.7703, 4.9528, 4.8394, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.7689, 5.6585, 5.8241, 5.7155,\n 5.8789, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.2483, 6.1471, 6.0474, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.4510, 6.5970, 6.7416, 6.8849, 7.0268, 7.1674, 7.0711,\n 7.2104, 7.3485, 7.2532, 7.3901, 7.5258, 7.4316, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.7778, 7.6867, 7.8168, 7.9460, 7.8558,\n 7.7667, 7.8948, 8.0219, 7.9336, 8.0598, 8.1850, 8.0976, 8.0111,\n 8.1354, 8.2588, 8.3813, 8.2956, 8.4173, 8.3324, 8.4532, 8.5732,\n 8.4891, 8.4057, 8.5249, 8.6433, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.6321, 8.7482, 8.8636, 8.9783, 8.8978, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.0786, 9.0000, 9.1119, 9.2232, 9.1452, 9.2559, 9.3659,\n 9.2885, 9.2118, 9.3212, 9.4299, 9.5381, 9.4619, 9.5695, 9.4939,\n 9.6008, 9.7072, 9.6322, 9.5577, 9.6635, 9.7688, 9.6948, 9.7996,\n 9.9038, 9.8303, 9.7574, 9.8611, 9.9642, 10.0668, 9.9944, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.1558, 10.0848, 10.1855, 10.2856, 10.2151,\n 10.3148, 10.4140, 10.3439, 10.2743, 10.3730, 10.4713, 10.5692, 10.5000,\n 10.5974, 10.5286, 10.6256, 10.7222, 10.6538, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Equally swoon-worthy is C.K. Dexter Haven, a pallid young dandy holding a jade-handled walking stick, with a poodle asleep at his feet.\nWith pronoun replaced: Equally swoon-worthy is C.K. Dexter Haven, a pallid young dandy holding a jade-handled walking stick, with a poodle asleep at Haven's feet.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.0721, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.2708, -1.3166, -1.3620, -1.2060, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.3663, -1.4100, -1.4535, -1.4967, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.6444, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.7404, -1.7780, -1.8155, -1.6843, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.5752, -1.6125, -1.6496, -1.6865, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.0000, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.8034, 2.0605, 1.9415, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 1.1926, 1.4045, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.8766, 2.0702, 1.9870, 2.1773,\n 2.3651, 2.5504, 2.7333, 2.9140, 3.0924, 3.2686, 3.1829, 3.0984,\n 3.2717, 3.4429, 3.6122, 3.7796, 3.9452, 4.1090, 4.2710, 4.1851,\n 4.1003, 4.0166, 4.1761, 4.0931, 4.0112, 4.1684, 4.0872, 4.2426,\n 4.3966, 4.5491, 4.4680, 4.3879, 4.3086, 4.2303, 4.1528, 4.0762,\n 4.0004, 4.1497, 4.2977, 4.4444, 4.5899, 4.7341, 4.8772, 5.0190,\n 5.1597, 5.2992, 5.4377, 5.3606, 5.4977, 5.6338, 5.7689, 5.6921,\n 5.8260, 5.9589, 6.0908, 6.0143, 6.1451, 6.2750, 6.4039, 6.3278,\n 6.2524, 6.3803, 6.3054, 6.2312, 6.3580, 6.4838, 6.6088, 6.7330,\n 6.8564, 6.9789, 6.9048, 7.0265, 7.1474, 7.0737, 7.1938, 7.3131,\n 7.2399, 7.3584, 7.4762, 7.4034, 7.3312, 7.4482, 7.5644, 7.4927,\n 7.4215, 7.5369, 7.6517, 7.5809, 7.6950, 7.8084, 7.9211, 8.0333,\n 8.1448, 8.2557, 8.1851, 8.2954, 8.4050, 8.5141, 8.4439, 8.5524,\n 8.4826, 8.5905, 8.6978, 8.8045, 8.9107, 9.0164, 9.1215, 9.0520,\n 9.1566, 9.2607, 9.3642, 9.2952, 9.3982, 9.5007, 9.6028, 9.5341,\n 9.4658, 9.5673, 9.6684, 9.6005, 9.7011, 9.8012, 9.9008, 10.0000,\n 9.9325, 9.8654, 9.9641, 9.8974, 9.9957, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Joe has sold his house and bought a new one a few miles away. He will be moving out of it on Thursday.\nWith pronoun replaced: He will be moving out of The old house on Thursday.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.5941, -0.2928, 0.0000, -0.0949, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.3373, -1.3833, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.4792, -1.5206, -1.5617, -1.6025, -1.4606,\n -1.5016, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.7424, -1.7817,\n -1.8209, -1.8598, -1.7219, -1.7609, -1.6241, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.5104, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.4393, -1.4777, -1.5159, -1.5539, -1.4241, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 2.6605, 2.9938, 2.7815, 2.5820, 2.3938, 2.2156, 2.0466, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.7406, 2.5924, 2.4495,\n 2.7136, 2.9704, 2.8301, 2.6943, 2.9424, 2.8098, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.5850, 4.4721, 4.3614, 4.5461, 4.4371, 4.6188,\n 4.7980, 4.9747, 4.8669, 4.7610, 4.6568, 4.8305, 5.0019, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.0332, 5.1978, 5.3605, 5.5213, 5.4222,\n 5.3245, 5.4832, 5.3867, 5.2915, 5.4482, 5.6032, 5.7566, 5.9084,\n 5.8139, 5.7207, 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.6486, 6.7890, 6.6973, 6.6066, 6.7456, 6.8834, 7.0201,\n 7.1556, 7.0657, 6.9768, 7.1111, 7.0231, 7.1563, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.7192, 7.8463, 7.7598, 7.6742, 7.8003,\n 7.9254, 8.0497, 8.1731, 8.0882, 8.0042, 8.1266, 8.2483, 8.1650,\n 8.0824, 8.2032, 8.3231, 8.4423, 8.3605, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.7033, 8.8179, 8.9319, 8.8527,\n 8.7742, 8.8874, 9.0000, 9.1119, 9.0340, 9.1452, 9.0679, 8.9912,\n 9.1018, 9.2118, 9.3212, 9.4299, 9.3537, 9.2782, 9.3863, 9.4939,\n 9.6008, 9.5258, 9.4513, 9.5577, 9.6635, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 10.0371, 10.1398, 10.0668, 9.9944, 10.0965,\n 10.1981, 10.2993, 10.2273, 10.1558, 10.2565, 10.3566, 10.4563, 10.3853,\n 10.4846, 10.4140, 10.3439, 10.4427, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.5974, 10.6944, 10.7910, 10.8872, 10.8184, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mark heard Steve's feet going down the ladder. The door of the shop closed after him. He ran to look out the window.\nWith pronoun replaced: The door of the shop closed after Mark.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "19", + "Fraction of T in Greenlist": "22.9%", + "z-score": "-0.444", + "p value": "0.671", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.5013, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "125", + "Fraction of T in Greenlist": "62.8%", + "z-score": "12.3", + "p value": "3.57e-35", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.9620, 4.1779, 4.3894, 4.2563, 4.1265, 4.3333,\n 4.2064, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.7488, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.2485, 5.4259, 5.6009, 5.7735,\n 5.9438, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.2667, 7.4174, 7.5664, 7.4524, 7.6000, 7.4878,\n 7.3773, 7.2684, 7.4146, 7.5593, 7.4521, 7.5954, 7.4897, 7.6317,\n 7.7723, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 7.8782, 8.0139,\n 8.1483, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.1652,\n 9.0702, 8.9763, 9.0987, 9.2202, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.3042, 9.4225, 9.5400, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.7091, 9.6210, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.6481, 10.5621, 10.6700, 10.5848,\n 10.5002, 10.4164, 10.5238, 10.6306, 10.5475, 10.6537, 10.5714, 10.4898,\n 10.5955, 10.5145, 10.6196, 10.7242, 10.6439, 10.7480, 10.6683, 10.5893,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.9220, 11.0235, 10.9458, 10.8686,\n 10.9697, 11.0702, 11.1702, 11.2698, 11.3688, 11.2924, 11.3910, 11.3150,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.2864, 11.3837, 11.4806, 11.5771,\n 11.6731, 11.5993, 11.6949, 11.7901, 11.7169, 11.8117, 11.9060, 12.0000,\n 11.9273, 11.8551, 11.9487, 12.0419, 12.1347, 12.2271, 12.3191])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: It is not easy to space buttonholes exactly the same distance apart, and it is very difficult to cut them precisely the right size. The tiniest slip of the scissors will make the hole too large, and even one thread uncut will leave it too small.\nWith pronoun replaced: Even one thread uncut will leave the right size too small.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.8553, -0.9152, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.6690, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.6591, -1.7041, -1.7488, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.5637, -1.4071, -1.2516, -1.2968, -1.3416, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.4100, -1.4535, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.4376, -1.4792, -1.5206, -1.3779, -1.4194, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.4254,\n -1.4656, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.4938, -1.5323,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.6125, -1.6496, -1.6865, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "88", + "Fraction of T in Greenlist": "44.2%", + "z-score": "6.26", + "p value": "1.9e-10", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 2.0656, 2.3938, 2.2156, 2.5281, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.9704, 3.2205, 3.4641, 3.7017, 3.5590, 3.7905, 4.0166,\n 4.2378, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.4295, 5.6099, 5.4870, 5.3666, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.8275, 5.7133, 5.6011, 5.4909, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.5035, 5.4000, 5.2981, 5.4610, 5.3605, 5.2615, 5.1640,\n 5.3245, 5.4832, 5.3867, 5.2915, 5.1977, 5.1051, 5.2614, 5.1698,\n 5.3243, 5.2338, 5.1444, 5.0562, 4.9691, 4.8830, 5.0350, 4.9497,\n 5.1000, 5.0156, 5.1643, 5.0807, 4.9980, 5.1450, 5.0630, 4.9820,\n 5.1273, 5.2713, 5.4140, 5.5556, 5.4747, 5.3947, 5.3156, 5.2372,\n 5.3769, 5.2992, 5.4377, 5.3606, 5.2842, 5.4212, 5.5572, 5.4813,\n 5.4061, 5.3316, 5.2578, 5.3921, 5.3189, 5.4521, 5.3793, 5.3072,\n 5.2358, 5.1650, 5.0948, 5.2262, 5.3567, 5.4863, 5.4163, 5.5448,\n 5.4752, 5.6028, 5.5336, 5.4650, 5.3970, 5.3295, 5.2626, 5.1962,\n 5.1303, 5.2560, 5.1905, 5.1255, 5.2501, 5.3740, 5.3092, 5.2449,\n 5.3677, 5.3038, 5.2402, 5.1772, 5.1146, 5.0525, 5.1739, 5.1121,\n 5.0507, 4.9897, 4.9292, 4.8690, 4.9891, 5.1085, 5.0485, 5.1671,\n 5.2850, 5.2251, 5.3423, 5.4588, 5.3991, 5.5149, 5.6300, 5.7446,\n 5.8585, 5.7987, 5.9120, 6.0246, 5.9651, 5.9059, 5.8470, 5.9588,\n 5.9002, 5.8420, 5.9530, 6.0635, 6.0054, 5.9477, 6.0575, 6.0000,\n 5.9429, 6.0519, 5.9950, 5.9385, 6.0468, 6.1546, 6.2619])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Billy cried because Toby wouldn't accept his toy.\nWith pronoun replaced: Billy cried because Toby wouldn't accept Toby's toy.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "29", + "Fraction of T in Greenlist": "14.6%", + "z-score": "-3.4", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.4264, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.1691, -2.2111, -2.2528, -2.2943, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.4623, -2.5019,\n -2.5412, -2.5802, -2.6190, -2.6576, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.4738, -2.5123, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.2023, -3.2365, -3.2705, -3.1327, -3.1669, -3.2009, -3.2348,\n -3.2685, -3.3020, -3.3354, -3.2002, -3.2337, -3.2671, -3.3003, -3.3333,\n -3.3662, -3.3990, -3.4316, -3.2991, -3.3319, -3.3645, -3.3970])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 1.3608, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.5590, 3.4207, 3.6515,\n 3.8772, 4.0980, 3.9620, 3.8297, 4.0451, 4.2563, 4.1265, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.6790, 4.8712, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.4909, 5.3825, 5.5500, 5.7155,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.6944, 6.5970, 6.5008, 6.6454, 6.5504, 6.4566, 6.5997,\n 6.5069, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 6.8834, 6.7937,\n 6.9303, 6.8414, 6.9768, 6.8889, 6.8019, 6.7159, 6.6308, 6.5465,\n 6.6804, 6.5970, 6.7298, 6.8615, 6.9923, 7.1220, 7.2508, 7.1678,\n 7.2956, 7.2134, 7.3402, 7.4661, 7.3845, 7.5094, 7.4286, 7.5526,\n 7.4724, 7.5955, 7.5161, 7.6383, 7.7597, 7.8803, 7.8014, 7.9212,\n 7.8429, 7.7653, 7.8842, 8.0024, 8.1198, 8.0427, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.0476, 7.9729, 8.0880, 8.2024, 8.3162, 8.4293,\n 8.5417, 8.4674, 8.3937, 8.5054, 8.6165, 8.7270, 8.6537, 8.7636,\n 8.8728, 8.9815, 8.9086, 8.8364, 8.9444, 8.8726, 8.8013, 8.9087,\n 8.8379, 8.9447, 9.0510, 8.9806, 9.0863, 9.1915, 9.1215, 9.2261,\n 9.3302, 9.4338, 9.5369, 9.4673, 9.5698, 9.6719, 9.6028, 9.7043,\n 9.6356, 9.5673, 9.6684, 9.7690, 9.7011, 9.6336, 9.5666, 9.5000,\n 9.6000, 9.5338, 9.4680, 9.5675, 9.5021, 9.6011, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: As Ollie carried Tommy up the long winding steps, his legs dangled.\nWith pronoun replaced: Ollie's legs dangled.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "8.0%", + "z-score": "-5.53", + "p value": "1", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.8368,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.2408, -3.2796, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.7626,\n -3.7981, -3.8335, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -4.0415,\n -4.0754, -4.1092, -4.1429, -4.1763, -4.2096, -4.0541, -4.0876, -4.1210,\n -4.1542, -4.1872, -4.2200, -4.2527, -4.2852, -4.3176, -4.3498, -4.3818,\n -4.4137, -4.4454, -4.4769, -4.5083, -4.5396, -4.5707, -4.6017, -4.6325,\n -4.6632, -4.6938, -4.7242, -4.7544, -4.7846, -4.8146, -4.8444, -4.8742,\n -4.9038, -4.9333, -4.9626, -4.9918, -5.0210, -5.0499, -5.0788, -5.1075,\n -5.1362, -5.1647, -5.1931, -5.2213, -5.2495, -5.2776, -5.3055, -5.3333,\n -5.3611, -5.3887, -5.4162, -5.4436, -5.4709, -5.4981, -5.5252])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.5033, 4.7556, 5.0000, 4.8008, 4.6101, 4.8488, 5.0811, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.8068, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.6183, 5.4740, 5.6667,\n 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 5.9944, 6.1721,\n 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828, 6.3509,\n 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 7.0803, 6.9714,\n 6.8641, 7.0133, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.3855,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 7.7782,\n 7.6794, 7.5818, 7.7174, 7.6210, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 8.1291, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.2372, 8.3625, 8.4868, 8.3976, 8.3093, 8.2219,\n 8.3453, 8.2588, 8.3813, 8.2956, 8.2107, 8.3324, 8.4532, 8.3691,\n 8.4891, 8.6083, 8.7267, 8.6433, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.8294, 8.9448, 9.0595, 8.9783, 8.8978, 8.8179, 8.9319, 8.8527,\n 8.9660, 9.0786, 9.1905, 9.3017, 9.4124, 9.3338, 9.4438, 9.5532,\n 9.6619, 9.5840, 9.5066, 9.6148, 9.7224, 9.6456, 9.7526, 9.8590,\n 9.9648, 9.8887, 9.8131, 9.7380, 9.8433, 9.7688, 9.8736, 9.7996,\n 9.7261, 9.8303, 9.9340, 9.8611, 9.9642, 10.0668, 10.1690, 10.0965,\n 10.0245, 10.1262, 10.2273, 10.1558, 10.2565, 10.3566, 10.4563, 10.3853,\n 10.3148, 10.4140, 10.5128, 10.4427, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.5974, 10.5286, 10.6256, 10.5573, 10.6538, 10.5859, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam pulled up a chair to the piano, but it was broken, so he had to sing instead.\nWith pronoun replaced: The chair was broken, so he had to sing instead.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "28", + "Fraction of T in Greenlist": "14.1%", + "z-score": "-3.56", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.6025, -2.6485, -2.6941, -2.5011, -2.5471, -2.3570,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.8368,\n -2.8786, -2.9200, -2.9611, -3.0019, -3.0424, -3.0827, -3.1226, -3.1623,\n -3.2017, -3.0317, -3.0714, -3.1109, -3.1500, -3.1889, -3.2276, -3.0619,\n -3.1008, -3.1395, -3.1779, -3.2161, -3.2541, -3.2918, -3.3293, -3.3665,\n -3.4035, -3.4403, -3.4769, -3.5132, -3.5494, -3.5853, -3.6210, -3.6566,\n -3.5001, -3.5359, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.5590,\n -3.5941, -3.6289, -3.6635, -3.6980, -3.7323, -3.7664, -3.8003, -3.8341,\n -3.6856, -3.7196, -3.7534, -3.6067, -3.6407, -3.4953, -3.3508, -3.3853,\n -3.4197, -3.4539, -3.4879, -3.5218, -3.5555, -3.5890, -3.6224, -3.6556,\n -3.5151, -3.3754, -3.4091, -3.4427, -3.4760, -3.5093, -3.5424, -3.5753,\n -3.6080, -3.6407, -3.5043, -3.5370, -3.4017, -3.4346, -3.4674, -3.5000,\n -3.5325, -3.5648, -3.4316, -3.4641, -3.4964, -3.5286, -3.5607])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.0466, 1.8856,\n 2.1939, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.5627, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 2.3238, 2.5538, 2.4422, 2.6667,\n 2.5568, 2.7761, 2.6679, 2.5621, 2.7757, 2.9856, 3.1918, 3.0861,\n 2.9824, 3.1840, 3.0817, 3.2796, 3.4743, 3.3729, 3.2733, 3.1754,\n 3.3657, 3.5533, 3.4562, 3.3607, 3.5447, 3.4503, 3.6315, 3.8103,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.8819, 4.0531, 4.2222, 4.3894,\n 4.5547, 4.4630, 4.6262, 4.5356, 4.6967, 4.8561, 5.0138, 4.9237,\n 4.8347, 4.9904, 4.9023, 5.0562, 5.2086, 5.1212, 5.0350, 4.9497,\n 5.1000, 5.2489, 5.1643, 5.0807, 5.2278, 5.1450, 5.0630, 5.2085,\n 5.1273, 5.0469, 4.9675, 4.8889, 4.8111, 4.9543, 5.0964, 5.0190,\n 5.1597, 5.0829, 5.2223, 5.3606, 5.2842, 5.2086, 5.3455, 5.2705,\n 5.4061, 5.5407, 5.6743, 5.5995, 5.5255, 5.6578, 5.5842, 5.7155,\n 5.8458, 5.7726, 5.7001, 5.8292, 5.9575, 5.8853, 5.8138, 5.9409,\n 6.0671, 6.1926, 6.3172, 6.4409, 6.3694, 6.4923, 6.4213, 6.5433,\n 6.4728, 6.5939, 6.7143, 6.6441, 6.7637, 6.8825, 6.8127, 6.7434,\n 6.8614, 6.9786, 6.9097, 7.0262, 7.1420, 7.2572, 7.3717, 7.3030,\n 7.4168, 7.5299, 7.4616, 7.5740, 7.6859, 7.6179, 7.7291, 7.6615,\n 7.7720, 7.8820, 7.9913, 8.1001, 8.2084, 8.3161, 8.2486, 8.3557,\n 8.2887, 8.3952, 8.3286, 8.4345, 8.5399, 8.4736, 8.5785, 8.6828,\n 8.6169, 8.5513, 8.6551, 8.7584, 8.6932, 8.7959, 8.8982, 9.0000,\n 9.1013, 9.0364, 9.1372, 9.2376, 9.1730, 9.2729, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The fish ate the worm. It was hungry.\nWith pronoun replaced: The worm was hungry.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.5556, 0.4364, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, -0.2255, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, 0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.6028, -0.6547,\n -0.4888, -0.5407, -0.5922, -0.6433, -0.6939, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.2423, -1.2857, -1.3288, -1.1825, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -0.9676, -1.0105, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.1513, -1.1921, -1.2326, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.0849, -1.1251, -1.1651, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -1.1399, -1.0106, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -0.9119, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "104", + "Fraction of T in Greenlist": "52.3%", + "z-score": "8.88", + "p value": "3.31e-19", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.3083, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 4.7357, 4.6082, 4.4836, 4.6790, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.3614, 4.5461, 4.7281, 4.9075,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.2129, 5.1065, 5.0019, 4.8990,\n 5.0680, 4.9666, 4.8667, 5.0332, 5.1978, 5.3605, 5.5213, 5.6804,\n 5.5811, 5.7382, 5.8936, 5.7955, 5.6986, 5.6032, 5.5090, 5.4160,\n 5.5691, 5.7207, 5.8707, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.4153, 6.5569, 6.6973, 6.6066, 6.5169, 6.4283, 6.3408,\n 6.4795, 6.3928, 6.3070, 6.4444, 6.5807, 6.7159, 6.8500, 6.9830,\n 6.8977, 7.0296, 7.1605, 7.0759, 6.9923, 6.9094, 6.8274, 6.7462,\n 6.8757, 7.0043, 7.1319, 7.0513, 7.1779, 7.3037, 7.2236, 7.1443,\n 7.0658, 6.9879, 7.1125, 7.0353, 6.9587, 7.0823, 7.2051, 7.3271,\n 7.4483, 7.5687, 7.4924, 7.6120, 7.7308, 7.6551, 7.5800, 7.5056,\n 7.4317, 7.3584, 7.4762, 7.5933, 7.5204, 7.4482, 7.5644, 7.6800,\n 7.6082, 7.5369, 7.4662, 7.3960, 7.5106, 7.4409, 7.3717, 7.4855,\n 7.5988, 7.7114, 7.8233, 7.9347, 7.8657, 7.9764, 8.0865, 8.0178,\n 7.9497, 7.8820, 7.8147, 7.9241, 7.8572, 7.7908, 7.8995, 8.0076,\n 8.1151, 8.2221, 8.3286, 8.2624, 8.3683, 8.3024, 8.4078, 8.3423,\n 8.4471, 8.3820, 8.3173, 8.4215, 8.3572, 8.4608, 8.3969, 8.5000,\n 8.6026, 8.7048, 8.8065, 8.9077, 8.8439, 8.7805, 8.8812])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The journalists interviewed the stars of the new movie. They were very cooperative, so the interview lasted for a long time.\nWith pronoun replaced: The journalists were very cooperative, so the interview lasted for a long time.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "161", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.05", + "p value": "0.148", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.1873, 0.0925, 0.0000,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.6901, 0.8907, 0.8165,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.9180, 0.8520, 0.7868, 0.7223, 0.8980, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 0.8617, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.9115, 1.0593, 1.2060, 1.1514, 1.0973, 1.2423, 1.1882,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.9245, 0.8755, 0.8268, 0.7784, 0.9129,\n 1.0465])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.4641, 3.3221, 3.5590, 3.4207, 3.2863,\n 3.1558, 3.0290, 3.2577, 3.4816, 3.7009, 3.9158, 3.7897, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 5.2485, 5.1326, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.1490, 5.0410, 4.9348, 4.8305, 5.0019, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.4610, 5.6220, 5.7812, 5.6804,\n 5.8377, 5.9932, 5.8936, 6.0474, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.4059, 6.5504, 6.4566, 6.3640,\n 6.2725, 6.1820, 6.3248, 6.4663, 6.6066, 6.7456, 6.6559, 6.5672,\n 6.7049, 6.8414, 6.9768, 6.8889, 6.8019, 6.9361, 6.8500, 6.9830,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.4193, 7.3346, 7.2508, 7.3786,\n 7.2956, 7.4225, 7.3402, 7.2587, 7.1779, 7.0980, 7.2236, 7.3485,\n 7.4724, 7.5955, 7.5161, 7.4373, 7.5595, 7.6808, 7.8014, 7.7232,\n 7.6456, 7.7653, 7.6883, 7.8072, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.1229, 8.0476, 8.1628, 8.0880, 8.2024, 8.1282, 8.0546,\n 7.9816, 7.9091, 8.0227, 8.1356, 8.2479, 8.3595, 8.2874, 8.2158,\n 8.3268, 8.4371, 8.5469, 8.4757, 8.4050, 8.5141, 8.4439, 8.5524,\n 8.4826, 8.5905, 8.6978, 8.8045, 8.9107, 8.8413, 8.7724, 8.8780,\n 8.8094, 8.9145, 8.8464, 8.7788, 8.7116, 8.6448, 8.7492, 8.8531,\n 8.9565, 9.0593, 8.9929, 8.9268, 9.0292, 9.1310, 9.2324, 9.1667,\n 9.1013, 9.2022, 9.1372, 9.2376, 9.1730, 9.2729, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The trophy doesn't fit into the brown suitcase because it is too small.\nWith pronoun replaced: The trophy is too small.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.9901, 0.8783, 0.7698, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.0000, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.3758, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.2085, 0.4140, 0.6167, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.8452, 1.0094, 0.9488, 0.8889, 0.8295, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 1.0932, 1.0370, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 1.1345, 1.0812, 1.0284, 1.1711, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.3112,\n 1.4470, 1.3957, 1.3448, 1.2943, 1.2441, 1.3779, 1.3278, 1.4606,\n 1.4105, 1.3608, 1.3114, 1.4427, 1.3933, 1.3443, 1.2956, 1.4254,\n 1.3768, 1.3284, 1.2804, 1.4087, 1.3607, 1.3131, 1.4402, 1.5667,\n 1.5189, 1.4713, 1.4241, 1.5492, 1.5020, 1.6262, 1.5791, 1.5323,\n 1.6555, 1.6087, 1.5621, 1.6843, 1.6378, 1.5916, 1.5457, 1.5000,\n 1.6208, 1.5752, 1.6951, 1.6496, 1.6042, 1.5592, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "113", + "Fraction of T in Greenlist": "56.8%", + "z-score": "10.4", + "p value": "1.99e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.4659, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 3.7417, 3.6098, 3.4816, 3.7009, 3.5753, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.8431, 4.0446, 4.2426, 4.1260, 4.3205,\n 4.5118, 4.7002, 4.5850, 4.4721, 4.3614, 4.2528, 4.1461, 4.3301,\n 4.2251, 4.1219, 4.0205, 4.2008, 4.3788, 4.2784, 4.4537, 4.6268,\n 4.7977, 4.6981, 4.6000, 4.5034, 4.4083, 4.3146, 4.4820, 4.3894,\n 4.2981, 4.2080, 4.3727, 4.5356, 4.4462, 4.6070, 4.7662, 4.9237,\n 4.8347, 4.9904, 5.1444, 5.2970, 5.2086, 5.3594, 5.5088, 5.6569,\n 5.5690, 5.4822, 5.3964, 5.3116, 5.2278, 5.3736, 5.2906, 5.2085,\n 5.1273, 5.2713, 5.4140, 5.3333, 5.4747, 5.6149, 5.7540, 5.6737,\n 5.8114, 5.9481, 6.0837, 6.0038, 6.1382, 6.2716, 6.4040, 6.3246,\n 6.4558, 6.5861, 6.7155, 6.6365, 6.5583, 6.4807, 6.4039, 6.3278,\n 6.4558, 6.3803, 6.3054, 6.4322, 6.5582, 6.4838, 6.4101, 6.5350,\n 6.6591, 6.7823, 6.9048, 7.0265, 6.9529, 7.0737, 7.0007, 7.1207,\n 7.0481, 7.1673, 7.2857, 7.4034, 7.5204, 7.4482, 7.5644, 7.6800,\n 7.6082, 7.7230, 7.8372, 7.7658, 7.8793, 7.9921, 7.9211, 8.0333,\n 8.1448, 8.0742, 8.1851, 8.2954, 8.4050, 8.5141, 8.6226, 8.7305,\n 8.8379, 8.7676, 8.8744, 8.9806, 8.9107, 9.0164, 9.1215, 9.0520,\n 9.1566, 9.2607, 9.1916, 9.2952, 9.3982, 9.3295, 9.4320, 9.5341,\n 9.6356, 9.7367, 9.8373, 9.9374, 10.0371, 9.9687, 10.0679, 10.1667,\n 10.0987, 10.1970, 10.2949, 10.2273, 10.3248, 10.4217, 10.3546])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Thomson visited Cooper's grave in 1765. At that date he had been travelling for five years.\nWith pronoun replaced: Cooper had been travelling for five years.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.5328, -1.6036, -1.6729, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -1.9640, -1.7321,\n -1.5038, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.4190, -1.4765, -1.2667, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.0906, -1.1476, -1.2041, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.6988, -1.7457,\n -1.7923, -1.6222, -1.4536, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.1316, -0.9734, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.0513, -1.0973, -1.1429, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.1390, -1.1825, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.3362, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.2623, -1.3035, -1.3443, -1.2063, -1.0690,\n -0.9326, -0.7971, -0.8389, -0.8805, -0.9218, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -1.0000,\n -1.0390, -1.0777, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.9258, 0.7454, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.9439, 3.2222, 3.0551, 2.8947, 2.7406, 3.0072, 3.2660,\n 3.5176, 3.3665, 3.2205, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.5363, 4.4091, 4.2848, 4.1633, 4.3618, 4.5569, 4.7488, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 5.2485, 5.1326, 5.0190, 4.9075,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.4909, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.7242, 5.6220, 5.5213, 5.6804,\n 5.8377, 5.7382, 5.6401, 5.7955, 5.9491, 6.1012, 6.0041, 6.1546,\n 6.0587, 5.9641, 6.1128, 6.0193, 5.9270, 5.8358, 5.9827, 6.1283,\n 6.2725, 6.1820, 6.0927, 6.2354, 6.3768, 6.5169, 6.6559, 6.5672,\n 6.4795, 6.6171, 6.7536, 6.8889, 7.0231, 7.1563, 7.2884, 7.4194,\n 7.3322, 7.4622, 7.5912, 7.7192, 7.6328, 7.7598, 7.8859, 7.8003,\n 7.7155, 7.8406, 7.7566, 7.8808, 7.7976, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.3231, 8.2413, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.6321, 8.5516, 8.4718, 8.5879, 8.7033, 8.8179, 8.7388, 8.8527,\n 8.7742, 8.8874, 8.8095, 8.9221, 8.8448, 8.7681, 8.6921, 8.8039,\n 8.7284, 8.8396, 8.9502, 9.0601, 9.1694, 9.2782, 9.2032, 9.1287,\n 9.0548, 9.1629, 9.2704, 9.1970, 9.1242, 9.2311, 9.1587, 9.2651,\n 9.3708, 9.4761, 9.4042, 9.5089, 9.6130, 9.7167, 9.8198, 9.7483,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.2565, 10.3566, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.8505, 10.7795, 10.7090, 10.8064, 10.9034, 11.0000,\n 11.0961, 11.1919, 11.1218, 11.0521, 11.1475, 11.0782, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Alice was dusting the living room and trying to find the button that Mama had hidden. No time today to look at old pictures in her favorite photo album. Today she had to hunt for a button, so she put the album on a chair without even opening it.\nWith pronoun replaced: She put the album on a chair without even opening the living room.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -1.1816, -0.9661, -0.7537, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.2649, -0.3290, -0.3922, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.2173, -1.2649,\n -1.3122, -1.3590, -1.4056, -1.2443, -1.2910, -1.3373, -1.1783, -1.0206,\n -1.0675, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.3416, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.2686, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.3779, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.4881, -1.3530, -1.2185,\n -1.2585, -1.1251, -1.1651, -1.0328, -1.0729, -1.1127, -0.9816, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 2.8402, 2.3570, 2.8368, 2.4495, 2.1170,\n 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 2.6605, 2.9938,\n 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 3.7712, 4.0415, 3.8497, 3.6667,\n 3.9279, 4.1812, 4.0056, 3.8367, 4.0825, 4.3217, 4.1586, 4.0012, 4.2339,\n 4.4610, 4.6829, 4.8999, 4.7469, 4.9592, 4.8107, 5.0186, 4.8742, 4.7336,\n 4.9373, 5.1371, 5.0000, 4.8662, 5.0623, 5.2549, 5.1241, 4.9962, 5.1854,\n 5.3716, 5.5549, 5.7354, 5.6099, 5.7877, 5.6647, 5.8398, 5.7192, 5.6009,\n 5.7735, 5.9438, 5.8275, 5.7133, 5.8812, 6.0469, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.3089, 6.4667, 6.3578, 6.5137, 6.4065, 6.5607, 6.4550, 6.3509,\n 6.5033, 6.6541, 6.5514, 6.4501, 6.5993, 6.7469, 6.6469, 6.5483, 6.6944,\n 6.8391, 6.9824, 7.1243, 7.0268, 7.1674, 7.0711, 7.2104, 7.1152, 7.0211,\n 7.1591, 7.2960, 7.2029, 7.1110, 7.2466, 7.3810, 7.2900, 7.2001, 7.3333,\n 7.4655, 7.5967, 7.7268, 7.6376, 7.7667, 7.6785, 7.8065, 7.7192, 7.6328,\n 7.7598, 7.8859, 7.8003, 7.7155, 7.8406, 7.9649, 7.8808, 7.7976, 7.9209,\n 8.0434, 8.1650, 8.2858, 8.2032, 8.3231, 8.2413, 8.3605, 8.2793, 8.1989,\n 8.3172, 8.4348, 8.3550, 8.2760, 8.3927, 8.5088, 8.4303, 8.3525, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.7681, 8.8800, 8.8039, 8.7284,\n 8.8396, 8.9502, 8.8752, 8.8008, 8.9107, 9.0200, 8.9461, 8.8728, 8.9815,\n 9.0895, 9.1970, 9.3040, 9.2311, 9.3374, 9.2651, 9.3708, 9.2990, 9.2276,\n 9.3328, 9.4375, 9.3665, 9.2961, 9.4002, 9.5038, 9.4338, 9.3642, 9.4673,\n 9.5698, 9.6719, 9.7735, 9.7043, 9.8054, 9.7367, 9.8373, 9.7690, 9.7011,\n 9.8012, 9.9008, 9.8333, 9.7663, 9.8654, 9.9641, 9.8974, 9.8311, 9.9294,\n 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bill passed the half-empty plate to John because he was full.\nWith pronoun replaced: Bill was full.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "83", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "25.3%", + "z-score": "0.0634", + "p value": "0.475", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, 0.0000,\n -0.0902, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, 0.0727, 0.0000,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, 0.0000, -0.0658, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "147", + "Fraction of T in Greenlist": "73.9%", + "z-score": "15.9", + "p value": "2.28e-57", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 3.2998, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 5.8890, 6.1143, 5.8889, 6.1101, 5.8966, 6.1137, 6.3254, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.9282, 7.1187, 7.3054, 7.4885, 7.6681,\n 7.8445, 8.0178, 7.8360, 8.0076, 8.1763, 8.0018, 8.1689, 8.3333,\n 8.4953, 8.3283, 8.4887, 8.3267, 8.4857, 8.6424, 8.7970, 8.9496,\n 8.7943, 8.9456, 8.7943, 8.9443, 9.0924, 8.9455, 8.8015, 8.9489,\n 9.0947, 8.9544, 9.0990, 8.9618, 9.1051, 9.2469, 9.3871, 9.5258,\n 9.6630, 9.5304, 9.4000, 9.5366, 9.4087, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.3347, 10.4608, 10.3397,\n 10.4650, 10.3459, 10.4704, 10.5940, 10.4770, 10.5997, 10.7215, 10.8423,\n 10.9621, 11.0810, 11.1990, 11.3161, 11.4323, 11.5476, 11.4345, 11.5492,\n 11.4378, 11.5519, 11.6652, 11.7778, 11.8895, 12.0005, 11.8915, 11.7838,\n 11.8944, 11.7881, 11.8982, 12.0077, 12.1164, 12.2244, 12.3317, 12.4383,\n 12.5442, 12.6495, 12.5460, 12.6508, 12.5485, 12.6529, 12.5517, 12.6557,\n 12.7590, 12.6592, 12.7622, 12.8645, 12.9662, 13.0674, 13.1680, 13.2680,\n 13.3674, 13.4664, 13.5647, 13.4674, 13.5654, 13.4691, 13.5668, 13.6640,\n 13.7606, 13.8567, 13.9524, 13.8577, 13.7638, 13.8593, 13.7663, 13.8615,\n 13.9561, 14.0503, 14.1440, 14.2373, 14.3301, 14.4225, 14.5144, 14.4234,\n 14.5150, 14.4248, 14.5161, 14.4267, 14.5178, 14.6084, 14.5199, 14.6103,\n 14.7002, 14.7898, 14.8789, 14.9677, 15.0560, 15.1440, 15.2316, 15.3188,\n 15.4057, 15.3191, 15.4057, 15.4919, 15.4062, 15.4922, 15.5778, 15.6631,\n 15.5783, 15.6633, 15.5792, 15.6640, 15.7485, 15.8327, 15.9165, 15.8333,\n 15.9169, 15.8344, 15.9178, 16.0009, 15.9191, 15.8378, 15.9207])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bill passed the half-empty plate to John because he was full.\nWith pronoun replaced: John was full.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "180", + "# Tokens in Greenlist": "39", + "Fraction of T in Greenlist": "21.7%", + "z-score": "-1.03", + "p value": "0.849", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -0.9949, -1.0580, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -1.2039, -1.2572, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -1.1547, -0.9766, -1.0290, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.0911,\n -1.1406, -0.9733, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.0675, -1.1140, -1.1602, -1.0050, -1.0513, -0.8978, -0.9441, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -1.0512, -0.9048, -0.7593, -0.8041, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -0.8489, -0.8909,\n -0.9326, -0.9742, -0.8389, -0.8805, -0.7462, -0.7878, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "141", + "Fraction of T in Greenlist": "70.9%", + "z-score": "14.9", + "p value": "9.26e-51", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.7219, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.5991, 6.7648, 6.9282,\n 6.8031, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 8.1196, 8.2624,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.4540, 8.5915, 8.7277, 8.8626,\n 8.7515, 8.8853, 9.0179, 9.1493, 9.0401, 9.1706, 9.2999, 9.4281,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 9.8601, 9.9817, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.3737, 10.4903, 10.6061, 10.7211, 10.6218, 10.7362, 10.8498, 10.9626,\n 10.8647, 10.9769, 11.0883, 11.1991, 11.1026, 11.2127, 11.3222, 11.4310,\n 11.3357, 11.4440, 11.5515, 11.6584, 11.5645, 11.6709, 11.7766, 11.8818,\n 11.7890, 11.8937, 11.9977, 12.1012, 12.0096, 12.1125, 12.2150, 12.3168,\n 12.2263, 12.3277, 12.4286, 12.5289, 12.4395, 12.5394, 12.6387, 12.7376,\n 12.6492, 12.7476, 12.8456, 12.9430, 12.8556, 12.9527, 13.0493, 13.1453,\n 13.0590, 13.1547, 13.2499, 13.3447, 13.2593, 13.3537, 13.4477, 13.5412,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.6514, 13.7434, 13.8350, 13.9262,\n 13.8434, 13.9343, 14.0248, 14.1149, 14.0329, 14.1227, 14.2121, 14.3011,\n 14.2200, 14.3087, 14.3970, 14.4850, 14.4046, 14.4923, 14.5797, 14.6667,\n 14.5871, 14.6738, 14.7601, 14.8462, 14.7673, 14.8530, 14.9385])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The foxes are getting in at night and attacking the chickens. I shall have to kill them.\nWith pronoun replaced: I shall have to kill The foxes.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "29.1%", + "z-score": "1.35", + "p value": "0.0884", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.4042, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.2085, 0.1380, 0.0685, 0.0000,\n 0.2027, 0.4027, 0.6000, 0.5298, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.7851, 0.9488, 0.8889, 1.0507, 0.9909, 1.1508, 1.0911,\n 1.0319, 0.9733, 1.1306, 1.0721, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.9409, 0.8850, 0.8296, 0.9812, 1.1316, 1.0759, 1.0206,\n 0.9658, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 0.8447, 0.7921,\n 0.9372, 1.0812, 1.2243, 1.1711, 1.3128, 1.2597, 1.4001, 1.3472,\n 1.2946, 1.2423, 1.3810, 1.3288, 1.2771, 1.2257, 1.1746, 1.1239,\n 1.0735, 1.2096, 1.1593, 1.1094, 1.2441, 1.3779, 1.3278, 1.2780,\n 1.2285, 1.3608, 1.3114, 1.2623, 1.2136, 1.1651, 1.1169, 1.0690,\n 1.1991, 1.3284, 1.4570, 1.4087, 1.5363, 1.4881, 1.6148, 1.5667,\n 1.5189, 1.4713, 1.5967, 1.5492, 1.5020, 1.4551, 1.4084, 1.3620,\n 1.3159, 1.4393, 1.3933, 1.3474, 1.4699, 1.5916, 1.5457, 1.5000,\n 1.4546, 1.5752, 1.5298, 1.4846, 1.4397, 1.3950, 1.3506])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "38.8%", + "z-score": "3.69", + "p value": "0.000112", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -0.9623, -1.0954, -1.2185, -1.3333, -0.8006, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, 0.0000, 0.3216, 0.6325, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.7579, 0.6667,\n 0.9073, 0.8165, 1.0510, 1.2810, 1.1896, 1.4142, 1.3234, 1.2344,\n 1.4530, 1.3646, 1.5785, 1.7889, 1.9959, 1.9064, 1.8185, 1.7321,\n 1.6471, 1.8477, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.6330,\n 1.8245, 1.7450, 1.9333, 1.8543, 2.0397, 2.2226, 2.4034, 2.3238,\n 2.2453, 2.4228, 2.5983, 2.7717, 2.9433, 2.8638, 2.7854, 2.7080,\n 2.8764, 2.7995, 2.7235, 2.6485, 2.5743, 2.5011, 2.6656, 2.8284,\n 2.7552, 2.9161, 2.8433, 3.0022, 3.1597, 3.0870, 3.2426, 3.1704,\n 3.0989, 3.2525, 3.4047, 3.5556, 3.4839, 3.4130, 3.3428, 3.2733,\n 3.4217, 3.5689, 3.4995, 3.4308, 3.3627, 3.2953, 3.4402, 3.3731,\n 3.3066, 3.2408, 3.1755, 3.1109, 3.2533, 3.3947, 3.3301, 3.4701,\n 3.4058, 3.5446, 3.6824, 3.8191, 3.7547, 3.6908])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Joe's uncle can still beat him at tennis, even though he is 30 years younger.\nWith pronoun replaced: Joe is 30 years younger.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.2702, 1.5852, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.9901, 1.2687, 1.5396, 1.4237, 1.3112, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 1.1918, 1.0948, 1.0000,\n 0.9073, 1.1431, 1.0510, 1.2810, 1.1896, 1.0999, 1.0120, 1.2344,\n 1.4530, 1.6678, 1.5785, 1.4907, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 1.1767, 1.3641, 1.2910,\n 1.2189, 1.1476, 1.3308, 1.2599, 1.1898, 1.3697, 1.2999, 1.2309,\n 1.1628, 1.0954, 1.0289, 1.2039, 1.1375, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.7851, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.4216,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.5680, 0.5143, 0.4611, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.3404, 0.2907, 0.2414, 0.1925,\n 0.1438, 0.0956, 0.0476, 0.1898, 0.1419, 0.0943, 0.2349, 0.1873,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.0907, 0.0452, 0.0000, -0.0449, -0.0896, 0.0447, 0.0000,\n -0.0444, 0.0886, 0.0442, 0.0000, -0.0439, -0.0875, -0.1309, 0.0000,\n -0.0434, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.3369, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "120", + "Fraction of T in Greenlist": "60.3%", + "z-score": "11.5", + "p value": "6.55e-31", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 3.8411, 3.6148, 3.9056, 4.1851, 3.9727, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.6101, 4.4272, 4.2515, 4.0825,\n 3.9196, 3.7626, 3.6108, 3.8490, 4.0814, 4.3083, 4.1603, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 5.1962, 5.3889, 5.5783, 5.7646, 5.6307, 5.8140, 5.9944, 5.8635,\n 5.7354, 5.6099, 5.4870, 5.6647, 5.8398, 6.0125, 5.8919, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.5137, 6.6679, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 7.8782, 7.7782,\n 7.6794, 7.8150, 7.9495, 8.0829, 8.2151, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.6359, 8.5396, 8.6667, 8.7927, 8.6976, 8.6035, 8.5105,\n 8.6357, 8.5437, 8.6679, 8.7913, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.0060, 9.1252, 9.2435, 9.3611, 9.2729, 9.1856,\n 9.0991, 9.2159, 9.1302, 9.2463, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.2240, 9.3380, 9.4513, 9.5638, 9.6757, 9.5931, 9.7044, 9.6225,\n 9.7331, 9.6519, 9.7619, 9.8712, 9.9800, 9.8995, 10.0076, 10.1151,\n 10.0353, 9.9562, 9.8776, 9.7997, 9.9067, 9.8293, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884, 10.6904,\n 10.7920, 10.8931, 10.9936, 10.9176, 11.0177, 11.1173, 11.0418, 11.1410,\n 11.0661, 10.9917, 11.0904, 11.1886, 11.1148, 11.0414, 11.1392, 11.0663,\n 10.9939, 10.9220, 11.0194, 10.9480, 11.0450, 11.1415, 11.0705, 11.0000,\n 11.0961, 11.0261, 11.1218, 11.2171, 11.3120, 11.4065, 11.5006])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Emma did not pass the ball to Janie although she was open.\nWith pronoun replaced: She saw that Janie was open.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, 0.0685, 0.0000,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, 0.1879, 0.3735, 0.5571, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, 0.0000, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.3136, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.2949, 0.2449, 0.1952, 0.3404, 0.4845, 0.4345, 0.5774,\n 0.5274, 0.6689, 0.6190, 0.5695, 0.5203, 0.4714, 0.6108, 0.5620,\n 0.5134, 0.6513, 0.6029, 0.5547, 0.5069, 0.6430, 0.7784, 0.9129,\n 1.0465, 0.9979, 0.9497, 1.0820, 1.0338, 1.1651, 1.1169, 1.0690,\n 1.0215, 0.9742, 0.9272, 0.8805, 0.8340, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.4721, 0.4280, 0.3841, 0.5108,\n 0.4669, 0.4233, 0.3800, 0.3369, 0.4620, 0.5864, 0.5431, 0.5000,\n 0.6234, 0.5803, 0.7029, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "53.3%", + "z-score": "9.21", + "p value": "1.65e-20", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.3709, 5.5432, 5.4312, 5.3211, 5.4909, 5.6585, 5.8241, 5.7155,\n 5.6086, 5.5035, 5.6667, 5.5630, 5.4610, 5.3605, 5.5213, 5.6804,\n 5.8377, 5.9932, 5.8936, 6.0474, 5.9491, 6.1012, 6.0041, 5.9084,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.1664, 6.3122, 6.2197, 6.1283,\n 6.0380, 5.9488, 6.0927, 6.2354, 6.1470, 6.2883, 6.4283, 6.3408,\n 6.4795, 6.6171, 6.7536, 6.8889, 6.8019, 6.7159, 6.8500, 6.7648,\n 6.6804, 6.8133, 6.7298, 6.8615, 6.9923, 7.1220, 7.0391, 6.9570,\n 7.0857, 7.0043, 7.1319, 7.0513, 7.1779, 7.0980, 7.2236, 7.1443,\n 7.2691, 7.3930, 7.5161, 7.6383, 7.5595, 7.6808, 7.8014, 7.9212,\n 8.0402, 7.9619, 7.8842, 8.0024, 8.1198, 8.0427, 8.1594, 8.2754,\n 8.3906, 8.3140, 8.2381, 8.1628, 8.2772, 8.2024, 8.3162, 8.4293,\n 8.3550, 8.2813, 8.2082, 8.3205, 8.2479, 8.3595, 8.2874, 8.2158,\n 8.3268, 8.4371, 8.5469, 8.4757, 8.4050, 8.3349, 8.4439, 8.3742,\n 8.4826, 8.4133, 8.5212, 8.6284, 8.7351, 8.8413, 8.7724, 8.8780,\n 8.8094, 8.9145, 8.8464, 8.7788, 8.8832, 8.9872, 9.0906, 9.0233,\n 8.9565, 9.0593, 8.9929, 8.9268, 8.8612, 8.7959, 8.8982, 9.0000,\n 8.9351, 9.0364, 9.1372, 9.0726, 9.1730, 9.1088, 9.2086])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bill passed the gameboy to John because his turn was over.\nWith pronoun replaced: Bill's turn was over.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.0000,\n 0.2294, 0.4549, 0.3758, 0.2981, 0.5175, 0.4399, 0.6547, 0.5774,\n 0.5013, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.9396, 0.8667, 0.7947, 0.7237, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.6732, 0.6086, 0.5447, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.4103, 0.3499, 0.5222, 0.6928, 0.6319, 0.5717, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.4932, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.1063, 0.0529, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.1513, 0.0000, -0.0501, -0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.1952, 0.1459, 0.0969, 0.2414, 0.1925,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.0464, 0.0000, -0.0461, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, 0.0000, -0.0449, -0.0896, -0.1340, -0.1782,\n -0.2221, -0.2657, -0.1325, -0.1761, -0.2195, -0.0875, 0.0436, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.2100, -0.2513, -0.2924, -0.3333,\n -0.3740, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 2.1004, 2.4495, 2.7815, 2.5820, 2.3938, 2.7080, 2.5281, 2.8284,\n 3.1177, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.5301, 4.3818,\n 4.5985, 4.4544, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.9903, 7.1554, 7.0226, 7.1857, 7.0557, 6.9282,\n 6.8031, 6.6803, 6.8419, 7.0014, 7.1590, 7.3147, 7.1945, 7.3485,\n 7.5007, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 7.7460,\n 7.8905, 7.7784, 7.9216, 8.0632, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.2619, 8.1550, 8.2916, 8.4270, 8.5612, 8.6942, 8.8260, 8.9567,\n 9.0863, 9.2147, 9.3422, 9.4685, 9.3641, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.6559, 9.7778, 9.6775, 9.7986, 9.6995, 9.8198,\n 9.7219, 9.8414, 9.9601, 9.8634, 9.9813, 10.0984, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.5769, 10.4829, 10.3898, 10.2975, 10.4103,\n 10.5224, 10.6338, 10.5427, 10.4524, 10.5632, 10.6733, 10.7828, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.2316, 11.3373, 11.4425, 11.5470,\n 11.6510, 11.7543, 11.8571, 11.7696, 11.6827, 11.5966, 11.6990, 11.6137,\n 11.7157, 11.8172, 11.7326, 11.8336, 11.9341, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.1646, 12.0824, 12.1805, 12.0990, 12.1967, 12.2940,\n 12.3908, 12.3100, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.6283, 12.7226, 12.8165, 12.9099, 12.8313, 12.9244, 13.0171, 13.1094,\n 13.2012, 13.2927, 13.2149, 13.1376, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.6914, 13.7801, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Ann asked Mary what time the library closes, because she had forgotten.\nWith pronoun replaced: Ann had forgotten.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, 0.0925, 0.3651,\n 0.2705, 0.5345, 0.7924, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.5551, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.2801, 0.4865, 0.6901, 0.8907, 1.0887,\n 1.0136, 0.9396, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.6226, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.2710, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.9238, 1.0915, 1.0290, 0.9671, 0.9058,\n 0.8452, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.8577, 1.0141, 1.1693, 1.1114, 1.0541,\n 0.9972, 0.9409, 0.8850, 0.8296, 0.7746, 0.7201, 0.6660, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.6030, 0.7509, 0.6983, 0.6460, 0.7921,\n 0.9372, 1.0812, 1.0284, 0.9759, 0.9238, 0.8721, 0.8208, 0.7698,\n 0.7192, 0.6689, 0.8095, 0.9492, 0.8987, 0.8485, 0.7987, 0.7493,\n 0.8868, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.5952, 0.7303,\n 0.6825, 0.6351, 0.7688, 0.9017, 1.0338, 0.9858, 0.9382, 0.8909,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.7419, 0.6963,\n 0.6509, 0.6058, 0.5610, 0.6885, 0.6437, 0.5991, 0.7255, 0.8513,\n 0.9763, 0.9313, 0.8866, 0.8422, 0.7979, 0.7539, 0.7102, 0.6667,\n 0.6234, 0.7461, 0.8682, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "195", + "Fraction of T in Greenlist": "98.0%", + "z-score": "23.8", + "p value": "2.77e-125", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188,\n 4.9010, 5.1711, 5.4306, 5.6804, 5.9214, 6.1546, 6.3805, 6.5997,\n 6.8127, 7.0201, 7.2222, 7.4194, 7.6120, 7.8003, 7.9845, 8.1650,\n 8.3418, 8.5153, 8.6855, 8.8527, 9.0170, 9.1785, 9.3375, 9.4939,\n 9.6479, 9.7996, 9.9491, 10.0965, 10.2419, 10.3853, 10.5269, 10.6667,\n 10.8047, 10.9411, 11.0758, 11.2090, 11.3406, 11.4708, 11.5996, 11.7271,\n 11.8531, 11.9779, 12.1015, 12.2238, 12.3450, 12.4650, 12.5839, 12.7017,\n 12.8185, 12.9342, 13.0489, 13.1626, 13.2754, 13.3873, 13.4982, 13.6083,\n 13.7175, 13.8258, 13.9333, 14.0400, 14.1460, 14.2511, 14.3555, 14.4591,\n 14.5621, 14.6643, 14.7658, 14.8666, 14.9668, 15.0663, 15.1651, 15.2633,\n 15.3609, 15.4579, 15.5543, 15.6502, 15.7454, 15.8401, 15.9342, 16.0278,\n 16.1208, 16.2133, 16.3053, 16.3967, 16.4877, 16.5782, 16.6682, 16.7577,\n 16.8467, 16.9353, 17.0234, 17.1111, 17.1983, 17.2851, 17.3715, 17.4574,\n 17.5430, 17.6281, 17.7128, 17.7971, 17.8810, 17.9645, 18.0476, 18.1304,\n 18.2128, 18.2948, 18.3764, 18.4577, 18.5387, 18.6193, 18.6995, 18.7794,\n 18.8590, 18.9382, 19.0171, 19.0957, 19.1740, 19.2519, 19.3296, 19.4069,\n 19.4839, 19.5606, 19.6371, 19.7132, 19.7890, 19.8646, 19.9398, 20.0148,\n 20.0895, 20.1639, 20.2381, 20.3120, 20.3856, 20.4590, 20.5321, 20.6049,\n 20.6775, 20.7498, 20.8219, 20.8937, 20.9653, 21.0366, 21.1077, 21.1786,\n 21.2492, 21.3196, 21.3898, 21.4597, 21.5294, 21.5989, 21.6682, 21.7372,\n 21.8061, 21.8747, 21.9431, 22.0113, 22.0792, 22.1470, 22.2146, 22.2819,\n 22.3491, 22.4161, 22.4828, 22.5494, 22.6157, 22.6819, 22.7479, 22.8137,\n 22.8793, 22.9447, 23.0099, 23.0750, 23.1398, 23.2045, 23.2690, 23.3333,\n 23.3975, 23.4615, 23.5253, 23.5889, 23.6523, 23.7156, 23.7787])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Pam's parents came home and found her having sex with her boyfriend, Paul. They were furious about it.\nWith pronoun replaced: Pam's parents were furious about it.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -1.9245, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.2629, -2.3102, -2.3570,\n -2.1690, -2.2162, -2.2630, -2.0785, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.2673, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.4908, -2.5318, -2.5726, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.4574, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.5019,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.8107, -2.8478, -2.8846, -2.9212,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.9215, -2.9575, -2.9933, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -3.0288, -3.0638, -3.0987, -3.1334,\n -3.1679, -3.0292, -2.8913, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -3.0317, -2.8977, -2.9320, -2.9661, -3.0000,\n -3.0338, -3.0674, -3.1009, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.1177, 2.9439, 3.2222, 3.4915, 3.7524, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.0012, 3.8490, 4.0814, 3.9337, 3.7905, 4.0166,\n 3.8772, 4.0980, 4.3142, 4.5260, 4.3894, 4.5968, 4.8003, 5.0000,\n 4.8662, 4.7357, 4.6082, 4.8038, 4.6790, 4.5569, 4.7488, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.0684, 5.2485, 5.4259, 5.6009, 5.4848,\n 5.3709, 5.2590, 5.4312, 5.3211, 5.2129, 5.3825, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.8279, 5.9874, 6.1450, 6.0410, 5.9386,\n 5.8377, 5.9932, 5.8936, 6.0474, 5.9491, 5.8522, 6.0041, 6.1546,\n 6.0587, 6.2075, 6.3549, 6.5008, 6.4059, 6.3122, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.3248, 6.2354, 6.3768, 6.5169, 6.4283, 6.5672,\n 6.7049, 6.8414, 6.7536, 6.6667, 6.5807, 6.7159, 6.6308, 6.7648,\n 6.6804, 6.5970, 6.7298, 6.8615, 6.7788, 6.9094, 7.0391, 7.1678,\n 7.0857, 7.0043, 6.9237, 7.0513, 6.9714, 7.0980, 7.0187, 6.9402,\n 7.0658, 7.1904, 7.1125, 7.2363, 7.3592, 7.4813, 7.4039, 7.5251,\n 7.6456, 7.5687, 7.4924, 7.6120, 7.5364, 7.4613, 7.5800, 7.6980,\n 7.8153, 7.7407, 7.6667, 7.5933, 7.5204, 7.6368, 7.7524, 7.8673,\n 7.7949, 7.9091, 8.0227, 8.1356, 8.0636, 8.1758, 8.2874, 8.3984,\n 8.3268, 8.2557, 8.1851, 8.2954, 8.4050, 8.5141, 8.6226, 8.5524,\n 8.6603, 8.7676, 8.8744, 8.9806, 9.0863, 9.0164, 9.1215, 9.2261,\n 9.3302, 9.2607, 9.1916, 9.1230, 9.2265, 9.3295, 9.4320, 9.5341,\n 9.4658, 9.5673, 9.6684, 9.7690, 9.8691, 9.9687, 9.9008, 10.0000,\n 10.0987, 10.1970, 10.1295, 10.0624, 9.9957, 10.0935, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: This morning, Joey built a sand castle on the beach, and put a toy flag in the highest tower, but this afternoon the tide knocked it down.\nWith pronoun replaced: This afternoon the tide knocked The flag down.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.8321, 1.0954,\n 1.3525, 1.6036, 1.4968, 1.3926, 1.2910, 1.1918, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.0508, 0.1013, 0.2522, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, 0.0464, 0.0000, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, 0.0452, 0.0000, 0.1348, 0.0896, 0.2234, 0.1782,\n 0.1332, 0.0886, 0.2208, 0.3522, 0.4828, 0.6128, 0.5674, 0.5222,\n 0.6509, 0.6058, 0.5610, 0.5164, 0.6437, 0.5991, 0.5548, 0.6810,\n 0.8065, 0.9313, 1.0555, 1.0106, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.6598, 0.6170, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.1586, 4.0012, 3.8490, 4.0814, 3.9337, 4.1603, 4.3818,\n 4.5985, 4.8107, 5.0186, 5.2223, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 6.1283, 6.3058, 6.1721,\n 6.0413, 5.9132, 5.7877, 5.9628, 5.8398, 6.0125, 6.1828, 6.0622,\n 6.2302, 6.1118, 6.2776, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.2296,\n 7.3773, 7.5234, 7.6681, 7.8113, 7.9530, 8.0934, 7.9849, 8.1240,\n 8.2619, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.8260, 8.7210,\n 8.8518, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.4094, 9.3088, 9.4327, 9.3333, 9.4563, 9.5784, 9.6995, 9.6016,\n 9.5047, 9.4088, 9.5294, 9.6490, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.3695, 10.2763, 10.1840, 10.0926, 10.2062,\n 10.1157, 10.2287, 10.3409, 10.2514, 10.3630, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 11.0756, 11.1810, 11.0952, 11.0102, 11.1151, 11.2194, 11.3232, 11.4263,\n 11.5290, 11.6311, 11.5471, 11.6487, 11.7498, 11.8503, 11.9504, 12.0499,\n 11.9669, 12.0660, 12.1646, 12.0824, 12.1805, 12.2782, 12.1967, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.5979, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.7378, 12.6597, 12.7532, 12.8464, 12.7688,\n 12.8616, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.2572, 13.1815, 13.1063, 13.1966, 13.1219, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The man couldn't lift his son because he was so weak.\nWith pronoun replaced: The man was so weak.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.9702, -2.0412,\n -2.1106, -2.1783, -2.2446, -1.9245, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.9215, -1.9826, -2.0428, -2.1019, -1.8516,\n -1.9118, -1.6678, -1.7288, -1.4907, -1.2567, -1.0265, -1.0911, -1.1547,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -0.8141, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.5071, -0.3365, -0.3907, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.7318, -0.7809, -0.8296, -0.8779, -0.7201, -0.7685, -0.8165,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.1825, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.3377, -1.3771, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "172", + "# Tokens in Greenlist": "95", + "Fraction of T in Greenlist": "55.2%", + "z-score": "9.16", + "p value": "2.68e-20", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, 0.1204, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.7457, 1.6082, 1.8974, 2.1776, 2.4495,\n 2.3116, 2.1783, 2.4398, 2.3094, 2.5627, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.6726, 2.5533, 2.4371, 2.3238, 2.5538, 2.7791, 2.6667,\n 2.8868, 3.1027, 2.9913, 3.2026, 3.4101, 3.6141, 3.8146, 4.0119,\n 3.9001, 3.7905, 3.6831, 3.8759, 4.0657, 3.9595, 4.1461, 4.3301,\n 4.5115, 4.6904, 4.5847, 4.4809, 4.3788, 4.5544, 4.7278, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.0332, 5.1978, 5.0990, 5.2615, 5.4222,\n 5.3245, 5.4832, 5.3867, 5.5435, 5.4482, 5.3541, 5.2614, 5.4160,\n 5.3243, 5.4772, 5.6286, 5.7785, 5.6875, 5.5976, 5.7458, 5.8926,\n 6.0380, 6.1820, 6.0927, 6.2354, 6.3768, 6.5169, 6.6559, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.1111, 7.0231, 6.9361, 7.0692, 7.2012,\n 7.1149, 7.2459, 7.3758, 7.5048, 7.4193, 7.5472, 7.4625, 7.5895,\n 7.7155, 7.6315, 7.7566, 7.8808, 8.0042, 8.1266, 8.2483, 8.1650,\n 8.2858, 8.2032, 8.3231, 8.4423, 8.5607, 8.6783, 8.7952, 8.7133,\n 8.6321, 8.7482, 8.6677, 8.5879, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.7681, 8.8800, 8.9912,\n 9.1018, 9.0257, 8.9502, 8.8752, 8.8008, 8.7270, 8.8369, 8.7636,\n 8.8728, 8.8000, 8.9086, 9.0167, 8.9444, 8.8726, 8.9800, 8.9087,\n 9.0155, 8.9447, 9.0510, 9.1567])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The customer walked into the bank and stabbed one of the tellers. He was immediately taken to the hospital.\nWith pronoun replaced: The customer was immediately taken to the hospital.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -0.9309, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.5040, -0.5636, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.6325,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.2582, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, -0.1513, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.4147, -0.2756, -0.1374, -0.1826,\n -0.0455, 0.0907, 0.0452, 0.0000, -0.0449, 0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, 0.0000,\n -0.0434, -0.0865, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.3299, -0.3702, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "134", + "Fraction of T in Greenlist": "67.3%", + "z-score": "13.8", + "p value": "1.41e-43", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 1.0835, 0.9428,\n 1.2702, 1.5852, 1.8889, 1.7457, 1.6082, 1.8974, 2.1776, 2.0412,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.1334, 3.0123, 3.2348, 3.4528, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 4.1260, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.1740, 4.3614, 4.5461, 4.7281, 4.6188,\n 4.7980, 4.9747, 5.1490, 5.0410, 5.2129, 5.3825, 5.5500, 5.4433,\n 5.6086, 5.7719, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.5514, 6.7006, 6.8483, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.2232, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.2825, 8.4116, 8.5396, 8.4444, 8.5715, 8.6976, 8.8227, 8.7287,\n 8.8529, 8.9763, 9.0987, 9.0057, 9.1273, 9.2480, 9.3678, 9.2760,\n 9.3951, 9.5133, 9.6307, 9.5400, 9.6566, 9.7725, 9.8877, 9.7980,\n 9.9124, 10.0261, 10.1391, 10.0504, 10.1627, 10.2743, 10.3853, 10.2975,\n 10.4079, 10.5175, 10.6265, 10.5397, 10.6481, 10.7559, 10.8631, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.0102, 11.1151, 11.2194, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.4638, 11.5655, 11.6666, 11.7672, 11.6847,\n 11.7849, 11.8846, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.4223, 12.5179, 12.6130, 12.5336,\n 12.6283, 12.7226, 12.8165, 12.7378, 12.8313, 12.9244, 13.0171, 12.9391,\n 13.0314, 13.1233, 13.2149, 13.1376, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.5131, 13.6025, 13.5265, 13.6155, 13.7042, 13.7925])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Since Chester was dependent on Uncle Vernon, he couldn't very well marry without his approval\nWith pronoun replaced: He couldn't very well marry without Chester's approval\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.4880, 0.3849, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.0842, 0.0000,\n 0.2474, 0.4899, 0.4042, 0.6405, 0.8724, 1.0999, 1.0120, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.1342, 0.0667, 0.0000, -0.0658, 0.1307, 0.3248, 0.2582,\n 0.4491, 0.6376, 0.8238, 0.7559, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.2962, 0.2357,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.1723, 0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, 0.0558, 0.2222, 0.1659, 0.3303, 0.4932, 0.6547,\n 0.5974, 0.5407, 0.4845, 0.4288, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.2074, 0.1549, 0.3086, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, 0.0000,\n 0.1480, 0.0983, 0.2449, 0.3904, 0.5348, 0.4845, 0.4345, 0.3849,\n 0.3356, 0.2867, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.0000,\n 0.1400, 0.0930, 0.2319, 0.1849, 0.1382, 0.0919, 0.0458, 0.0000,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.0449, 0.0896, 0.0447, 0.1782,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, 0.0856, 0.0427, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, -0.0420, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.0829, 0.0413, 0.0000, 0.1234, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "148", + "Fraction of T in Greenlist": "74.4%", + "z-score": "16.1", + "p value": "1.64e-58", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 5.9297, 6.1389, 6.3434, 6.5433, 6.7390, 6.9307, 7.1187, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 8.0018, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.6469, 8.8029, 8.6424, 8.7970, 8.9496,\n 9.1002, 9.2488, 9.3956, 9.5406, 9.3881, 9.5321, 9.6743, 9.8150,\n 9.9540, 10.0915, 10.2275, 10.0820, 10.2172, 10.3510, 10.4834, 10.6145,\n 10.7442, 10.8727, 10.7333, 10.8612, 10.9878, 11.1132, 11.2376, 11.3608,\n 11.4829, 11.3489, 11.4704, 11.5909, 11.7104, 11.8289, 11.9464, 12.0630,\n 11.9338, 12.0499, 12.1651, 12.2794, 12.3928, 12.5053, 12.6170, 12.4922,\n 12.6035, 12.7140, 12.8237, 12.9326, 13.0408, 13.1482, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.5419, 13.6451, 13.7477,\n 13.8497, 13.9510, 14.0518, 14.1519, 14.0379, 14.1377, 14.2370, 14.3357,\n 14.4338, 14.5313, 14.6283, 14.5173, 14.6141, 14.7103, 14.8059, 14.9011,\n 14.9957, 15.0898, 14.9817, 14.8746, 14.9687, 15.0624, 14.9568, 15.0502,\n 15.1432, 15.2357, 15.3277, 15.4192, 15.5103, 15.6010, 15.4980, 15.5885,\n 15.4867, 15.3858, 15.4762, 15.5662, 15.6558, 15.5563, 15.4578, 15.5473,\n 15.6365, 15.7252, 15.8135, 15.9014, 15.8046, 15.8923, 15.7965, 15.7014,\n 15.6070, 15.5134, 15.6014, 15.6891, 15.7763, 15.6839, 15.7709, 15.8575,\n 15.9437, 16.0296, 15.9385, 15.8481, 15.7584, 15.6692, 15.7553, 15.6670,\n 15.7529, 15.8384, 15.9235, 15.8362, 15.9211, 15.8345, 15.9193, 15.8334,\n 15.9179, 15.8327, 15.7481, 15.8325, 15.9165, 15.8327, 15.9165, 15.8333,\n 15.9169, 15.8344, 15.9178, 15.8359, 15.9191, 16.0019, 16.0844])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mama came over and sat down beside Alice. Gently she stroked her hair and let the child weep.\nWith pronoun replaced: Mama stroked her hair and let the child weep.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.8034, 1.6859, 1.5717, 1.4606,\n 1.3525, 1.2472, 1.1446, 1.3926, 1.2910, 1.1918, 1.0948, 1.3333,\n 1.2372, 1.1431, 1.0510, 1.2810, 1.5068, 1.4142, 1.3234, 1.2344,\n 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.6348, 0.8402, 0.7646, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.4606, 0.3922, 0.5846, 0.5164,\n 0.4491, 0.3825, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.1796, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.0000, 0.1659, 0.1101, 0.0548, 0.0000,\n 0.1629, 0.1081, 0.0538, 0.2144, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, 0.0000, -0.0516, -0.1029, -0.1537, 0.0000,\n -0.0508, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, -0.1952, -0.2431, -0.2907, -0.1448, -0.1925,\n -0.2397, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.2791, -0.3246, -0.3698, -0.4147, -0.2756, -0.3205, -0.3651,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.3951, -0.4377, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.5579, -0.5991, -0.6402, -0.5108,\n -0.5518, -0.4233, -0.4644, -0.3369, -0.2100, -0.2513, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.2057, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "131", + "Fraction of T in Greenlist": "65.8%", + "z-score": "13.3", + "p value": "1.14e-40", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 4.5826, 4.0825,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.3235, 4.6291, 4.9193, 5.1962,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.6614, 5.4444, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.4846, 6.6667,\n 6.8457, 7.0219, 6.8718, 7.0456, 7.2168, 7.3853, 7.2400, 7.0980,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.5206,\n 8.4037, 8.5435, 8.6820, 8.8192, 8.7045, 8.8405, 8.9753, 9.1088,\n 8.9963, 8.8853, 9.0179, 9.1493, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.3422, 9.2376, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.6775, 9.7986, 9.9187, 10.0380,\n 10.1564, 10.0577, 9.9601, 10.0779, 10.1948, 10.3110, 10.2146, 10.3301,\n 10.4448, 10.5587, 10.4636, 10.3695, 10.4829, 10.5955, 10.5025, 10.4103,\n 10.5224, 10.6338, 10.7444, 10.8544, 10.7635, 10.6733, 10.7828, 10.8916,\n 10.9998, 10.9107, 11.0183, 11.1253, 11.2316, 11.1435, 11.0562, 11.1621,\n 11.2674, 11.3721, 11.4762, 11.3899, 11.3043, 11.4080, 11.5111, 11.6137,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.7498, 11.6666, 11.7672, 11.8673,\n 11.9669, 12.0660, 11.9837, 11.9020, 12.0008, 12.0990, 12.1967, 12.1158,\n 12.2132, 12.3100, 12.4065, 12.3263, 12.2467, 12.3428, 12.4384, 12.5336,\n 12.6283, 12.5495, 12.4713, 12.5657, 12.6597, 12.7532, 12.6757, 12.7688,\n 12.8616, 12.9540, 12.8771, 12.8007, 12.8928, 12.9845, 13.0758, 13.1667,\n 13.0910, 13.0157, 13.1063, 13.1966, 13.2864, 13.2118, 13.3014])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Emma did not pass the ball to Janie although she saw that she was open.\nWith pronoun replaced: Emma saw that she was open.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "74", + "Fraction of T in Greenlist": "37.2%", + "z-score": "3.97", + "p value": "3.59e-05", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.7321, 1.5403, 1.3608,\n 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.8856, 1.7321, 1.5852, 1.8889,\n 2.1822, 2.0370, 1.8974, 1.7628, 1.6330, 1.9096, 1.7823, 1.6590, 1.9245,\n 2.1831, 2.0605, 2.3113, 2.1909, 2.0738, 2.3163, 2.2011, 2.0889, 2.3238,\n 2.5538, 2.4422, 2.3333, 2.5568, 2.4495, 2.3445, 2.2418, 2.4585, 2.6713,\n 2.5690, 2.7775, 2.9824, 2.8808, 2.7811, 2.6833, 2.8830, 3.0796, 2.9823,\n 2.8868, 2.7928, 2.7005, 2.8919, 2.8006, 2.7107, 2.8983, 3.0833, 2.9938,\n 3.1760, 3.0873, 3.0000, 3.1789, 3.0924, 3.0071, 3.1829, 3.3566, 3.2717,\n 3.1879, 3.1052, 3.0237, 3.1937, 3.1129, 3.0330, 3.2004, 3.3659, 3.2863,\n 3.2077, 3.1300, 3.0533, 3.2157, 3.1394, 3.0641, 3.2242, 3.3826, 3.3075,\n 3.4641, 3.3895, 3.3156, 3.4702, 3.3968, 3.3243, 3.4768, 3.6279, 3.5556,\n 3.4839, 3.4130, 3.3428, 3.4915, 3.4217, 3.3526, 3.4995, 3.6452, 3.5762,\n 3.5079, 3.4402, 3.3731, 3.5166, 3.4499, 3.3838, 3.5256, 3.6664, 3.6004,\n 3.5350, 3.4701, 3.4058, 3.5446, 3.4806, 3.4171, 3.3542, 3.4913, 3.4286,\n 3.3665, 3.3049, 3.2437, 3.3789, 3.3181, 3.2577, 3.3915, 3.5245, 3.4641,\n 3.5960, 3.5359, 3.4762, 3.6068, 3.5474, 3.4884, 3.6178, 3.7463, 3.6874,\n 3.6289, 3.5708, 3.5131, 3.6401, 3.5827, 3.5256, 3.6515, 3.7766, 3.7196,\n 3.6629, 3.6067, 3.5508, 3.6745, 3.6188, 3.5635, 3.6862, 3.8081, 3.7528,\n 3.6979, 3.6433, 3.5890, 3.7097, 3.6556, 3.6019, 3.7216, 3.8406, 3.7869,\n 3.9052, 3.8516, 3.7984, 3.9158, 3.8627, 3.8100, 3.9265, 4.0423, 3.9896,\n 3.9372, 3.8851, 3.8333, 3.9481, 3.8964, 3.8451, 3.9590, 4.0723, 4.0210,\n 3.9699])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.7614, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 1.0328, 1.3859, 1.7233, 2.0466, 1.8856,\n 2.1939, 2.0381, 1.8889, 2.1822, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.6590, 1.9245, 2.1831, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.0000,\n 1.8970, 2.1229, 2.3445, 2.5621, 2.7757, 2.6713, 2.8804, 2.7775,\n 2.6765, 2.8808, 2.7811, 2.9814, 2.8830, 2.7863, 2.6914, 2.8868,\n 3.0793, 3.2691, 3.4562, 3.3607, 3.5447, 3.4503, 3.3574, 3.5382,\n 3.4463, 3.6242, 3.5333, 3.4438, 3.3556, 3.5301, 3.7025, 3.8730,\n 4.0415, 3.9530, 4.1192, 4.0316, 3.9452, 4.1090, 4.0234, 4.1851,\n 4.1003, 4.0166, 3.9340, 4.0931, 4.2507, 4.4066, 4.5611, 4.4783,\n 4.6311, 4.5491, 4.4680, 4.6188, 4.5384, 4.6876, 4.6079, 4.5291,\n 4.4511, 4.5983, 4.7442, 4.8889, 5.0323, 4.9543, 5.0964, 5.0190,\n 4.9424, 5.0829, 5.0070, 5.1461, 5.0707, 4.9960, 4.9221, 5.0596,\n 5.1962, 5.3316, 5.4661, 5.3921, 5.5255, 5.4521, 5.3793, 5.5114,\n 5.4391, 5.5701, 5.4983, 5.4272, 5.3567, 5.4863, 5.6150, 5.7429,\n 5.8698, 5.7994, 5.9254, 5.8554, 5.7860, 5.9109, 5.8419, 5.9660,\n 5.8974, 5.8294, 5.7619, 5.8848, 6.0069, 6.1283, 6.2489, 6.1815,\n 6.3013, 6.2342, 6.1677, 6.2866, 6.2205, 6.3385, 6.2728, 6.2075,\n 6.1427, 6.2598, 6.3762, 6.4920, 6.6072, 6.5424, 6.6568, 6.5924,\n 6.5285, 6.6421, 6.5785, 6.6914, 6.6282, 6.5653, 6.5029, 6.6150,\n 6.7264, 6.8373, 6.9477, 6.8853, 6.9950, 6.9330, 6.8713, 6.9803,\n 6.9190, 7.0273, 6.9663, 6.9056, 6.8454, 6.9530, 7.0601, 7.1667,\n 7.2728, 7.2125, 7.3180, 7.2581, 7.1985, 7.3034, 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mark became absorbed in Blaze, the white horse. He was afraid the stable boys at the Burlington Stables struck at him and bullied him because he was timid, so he took upon himself the feeding and care of the animal.\nWith pronoun replaced: Mark was afraid the stable boys at the Burlington Stables struck at him and bullied him.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.9596, -1.7450, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -2.1170, -1.9127, -1.7111, -1.7638, -1.8161, -1.8677, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.3552, -2.4000, -2.4444, -2.4885, -2.5322, -2.3564, -2.4004,\n -2.4441, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.4045, -2.4467, -2.4887, -2.5303, -2.5717, -2.4079, -2.4495,\n -2.4908, -2.5318, -2.5726, -2.6131, -2.6533, -2.6933, -2.7330, -2.7724,\n -2.8116, -2.6540, -2.6934, -2.7325, -2.5769, -2.4225, -2.2692, -2.3094,\n -2.3494, -2.1980, -2.2381, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.7091, -2.7454, -2.7815, -2.6414, -2.6776, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.7591, -2.7940, -2.8287, -2.6949, -2.7297, -2.7644, -2.6319, -2.5000,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.0861, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.2116, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.0612, -0.1217, -0.1816, -0.2408, -0.2993, -0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.2872, -0.3430, -0.1707, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.2144, -0.0534, 0.1063, 0.0529, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.1469, 0.0000, 0.1459, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.2828, -0.3289, -0.1873,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.0896, 0.0447, 0.0000,\n -0.0444, -0.0886, -0.1325, -0.1761, -0.2195, -0.2626, -0.1309, 0.0000,\n 0.1302, 0.0865, 0.0432, 0.0000, -0.0429, -0.0856, -0.1280, -0.1703,\n -0.2122, -0.2540, -0.2955, -0.1684, -0.0420, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, -0.1650, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + } + ], + "metrics": { + "accuracy_without_watermark": 0.44, + "accuracy_with_watermark": 0.52, + "f1_without_watermark": 0.43977591036414565, + "f1_with_watermark": 0.5198079231692677 + } + }, + "validation": { + "results": [ + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The drain is clogged with hair. It has to be cleaned.\nWith pronoun replaced: The hair has to be cleaned.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.5635, -1.6223, -1.6803, -1.7376, -1.7942, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.8000, -1.5894, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.5483, -1.5993, -1.6499,\n -1.7000, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -2.0548, -1.8843, -1.9298, -1.9749, -2.0197, -2.0641, -1.8974,\n -1.9420, -1.9863, -2.0303, -2.0739, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -1.9242, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -2.1057, -1.9518, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.2478,\n -2.2871, -2.3262, -2.3651, -2.4037, -2.4421, -2.2966, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.4572, -2.4944,\n -2.3538, -2.3912, -2.4283, -2.4653, -2.5020, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.5099, -2.5460, -2.5820, -2.4461, -2.4822, -2.5181, -2.3835,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.5351, -2.4042, -2.2740, -2.3094, -2.3447, -2.3798, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.3235, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.3094, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.0738, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.1054, 2.3333,\n 2.2269, 2.1229, 2.0211, 1.9215, 1.8240, 1.7285, 1.9462, 1.8516,\n 2.0647, 2.2743, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094, 2.0207,\n 1.9335, 1.8477, 1.7634, 1.9604, 2.1546, 2.3462, 2.5352, 2.4495,\n 2.6354, 2.8189, 3.0000, 3.1789, 3.0924, 3.0071, 2.9231, 3.0984,\n 3.0151, 3.1879, 3.1052, 3.2757, 3.1937, 3.3619, 3.2806, 3.4466,\n 3.3659, 3.2863, 3.2077, 3.3708, 3.2928, 3.4538, 3.6133, 3.7712,\n 3.6931, 3.8492, 3.7717, 3.9260, 4.0788, 4.0016, 4.1528, 4.0762,\n 4.2258, 4.1497, 4.2977, 4.2222, 4.1475, 4.0736, 4.2196, 4.1461,\n 4.2907, 4.4341, 4.5762, 4.7173, 4.8572, 4.9960, 5.1338, 5.2705,\n 5.1962, 5.3316, 5.4661, 5.5995, 5.7320, 5.8635, 5.9941, 6.1237,\n 6.2524, 6.3803, 6.3054, 6.2312, 6.3580, 6.4838, 6.4101, 6.3369,\n 6.2644, 6.3892, 6.3172, 6.4409, 6.5639, 6.6861, 6.8075, 6.9282,\n 7.0481, 7.1673, 7.2857, 7.4034, 7.5204, 7.4482, 7.5644, 7.6800,\n 7.7949, 7.9091, 8.0227, 8.1356, 8.0636, 8.1758, 8.2874, 8.3984,\n 8.5088, 8.6186, 8.7278, 8.6560, 8.7646, 8.8726, 8.9800, 9.0869,\n 9.1932, 9.2990, 9.4042, 9.5089, 9.6130, 9.5416, 9.6452, 9.7483,\n 9.6774, 9.6069, 9.7095, 9.6394, 9.7415, 9.6719, 9.7735, 9.8746,\n 9.9752, 10.0753, 10.1750, 10.2743, 10.3730, 10.4713, 10.5692, 10.6667,\n 10.7637, 10.6944, 10.6256, 10.5573, 10.6538, 10.7500, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Jane knocked on Susan's door but she did not answer.\nWith pronoun replaced: Susan did not answer.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "14", + "Fraction of T in Greenlist": "19.2%", + "z-score": "-1.15", + "p value": "0.875", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.2792, -1.0580, -1.1202, -1.1816, -1.2421, -1.3019, -1.3608,\n -1.1488])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "128", + "Fraction of T in Greenlist": "64.3%", + "z-score": "12.8", + "p value": "7.18e-38", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.1305, 2.8868,\n 2.6605, 2.4495, 2.7815, 3.0984, 3.4017, 3.6927, 3.4912, 3.7712,\n 4.0415, 3.8497, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.4222, 5.6183, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.0849, 5.9479, 6.1283, 6.3058, 6.4807,\n 6.3472, 6.5196, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.0895, 6.9646, 6.8419, 7.0014, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.6512, 7.5333, 7.6823, 7.5664, 7.4524, 7.3401, 7.2296,\n 7.1207, 7.0133, 6.9076, 6.8034, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.7931, 6.9378, 6.8391, 6.9824, 7.1243, 7.0268, 7.1674, 7.3068,\n 7.4449, 7.5818, 7.7174, 7.8520, 7.9853, 8.1176, 8.2488, 8.3789,\n 8.5079, 8.4116, 8.5396, 8.6667, 8.5715, 8.4774, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 9.1561, 9.0652,\n 9.1851, 9.3042, 9.2143, 9.3326, 9.4501, 9.5668, 9.6828, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.7622, 9.8753, 9.9878, 9.9015,\n 10.0133, 10.1243, 10.0389, 9.9542, 9.8702, 9.9807, 10.0906, 10.1999,\n 10.3085, 10.4164, 10.5238, 10.6306, 10.7367, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 10.9906, 11.0940, 11.1968, 11.2992, 11.2178, 11.3196,\n 11.4209, 11.3402, 11.2602, 11.3610, 11.2816, 11.2028, 11.1245, 11.0468,\n 11.1473, 11.2473, 11.3468, 11.4459, 11.5444, 11.6425, 11.7401, 11.8373,\n 11.7604, 11.8571, 11.7808, 11.7050, 11.8014, 11.7261, 11.8221, 11.9176,\n 12.0127, 12.1073, 12.0327, 12.1270, 12.2209, 12.3143, 12.4074, 12.5000,\n 12.5923, 12.6841, 12.7756, 12.8667, 12.7928, 12.7195, 12.8102])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Beth didn't get angry with Sally, who had cut her off, because she stopped and counted to ten.\nWith pronoun replaced: Sally stopped and counted to ten.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -0.7559, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.9631, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -0.8372, -0.8889, -0.9401, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.0206,\n -1.0675, -0.9115, -0.9584, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.6231, -1.6641, -1.5206, -1.5617, -1.6025, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.6230, -1.6630, -1.7028, -1.5637, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.4485, -1.3131, -1.3530, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.5159, -1.3859, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.5220, -1.5592, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "110", + "Fraction of T in Greenlist": "55.3%", + "z-score": "9.86", + "p value": "3e-23", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.8819, 1.2309, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.4757, 1.3480, 1.2247,\n 1.1055, 0.9901, 1.2687, 1.5396, 1.8034, 2.0605, 1.9415, 2.1909,\n 2.0738, 1.9599, 2.2011, 2.0889, 1.9795, 1.8728, 1.7685, 1.6667,\n 1.5671, 1.7963, 2.0211, 2.2418, 2.4585, 2.3570, 2.5690, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.2916, 2.1997, 2.1094, 2.3094,\n 2.5064, 2.7005, 2.8919, 2.8006, 2.9887, 2.8983, 2.8093, 2.9938,\n 2.9057, 2.8189, 2.7333, 2.6491, 2.5660, 2.4841, 2.6632, 2.8402,\n 3.0151, 3.1879, 3.3587, 3.5277, 3.6947, 3.8600, 4.0234, 3.9389,\n 4.1003, 4.2601, 4.4182, 4.3339, 4.4901, 4.6448, 4.7980, 4.9497,\n 4.8655, 5.0156, 4.9322, 4.8497, 4.9980, 5.1450, 5.0630, 5.2085,\n 5.3526, 5.4956, 5.6373, 5.7778, 5.6959, 5.8351, 5.7540, 5.6737,\n 5.8114, 5.7318, 5.8684, 6.0038, 5.9247, 6.0590, 5.9806, 5.9029,\n 5.8260, 5.7498, 5.8825, 6.0143, 5.9386, 5.8635, 5.9941, 6.1237,\n 6.2524, 6.3803, 6.5072, 6.4322, 6.5582, 6.4838, 6.4101, 6.5350,\n 6.6591, 6.7823, 6.9048, 7.0265, 7.1474, 7.0737, 7.1938, 7.3131,\n 7.4317, 7.5495, 7.6667, 7.7831, 7.7096, 7.8253, 7.9403, 7.8673,\n 7.9816, 8.0952, 8.2082, 8.3205, 8.4322, 8.5433, 8.6537, 8.7636,\n 8.6908, 8.8000, 8.9086, 9.0167, 8.9444, 9.0518, 9.1587, 9.2651,\n 9.3708, 9.2990, 9.4042, 9.3328, 9.2619, 9.3665, 9.4707, 9.4002,\n 9.5038, 9.6069, 9.7095, 9.8116, 9.9132, 9.8431, 9.9442, 9.8746,\n 9.8054, 9.9060, 9.8373, 9.9374, 10.0371, 9.9687, 10.0679, 10.0000,\n 9.9325, 9.8654, 9.7987, 9.8974, 9.9957, 9.9294, 9.8635])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: No one joins Facebook to be sad and lonely. But a new study from the University of Wisconsin psychologist George Lincoln argues that that's exactly how it makes us feel.\nWith pronoun replaced: That's exactly how Facebook makes us feel.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "159", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "36.5%", + "z-score": "3.34", + "p value": "0.000415", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165, 1.3472,\n 1.0954, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 1.7321, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142, 1.2702, 1.5852, 1.4444,\n 1.7457, 2.0370, 1.8974, 2.1776, 2.4495, 2.3116, 2.1783, 2.0494, 1.9245,\n 2.1831, 2.0605, 1.9415, 1.8257, 2.0738, 1.9599, 1.8489, 2.0889, 2.3238,\n 2.2133, 2.4422, 2.6667, 2.5568, 2.4495, 2.3445, 2.2418, 2.4585, 2.3570,\n 2.2576, 2.1602, 2.3706, 2.2743, 2.1798, 2.3851, 2.5873, 2.4930, 2.6914,\n 2.8868, 2.7928, 2.7005, 2.6098, 2.5205, 2.7107, 2.6222, 2.5352, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.6491, 2.8292, 2.7456, 2.9231, 3.0984, 3.0151,\n 2.9329, 2.8518, 2.7717, 2.9433, 2.8638, 2.7854, 2.7080, 2.6316, 2.7995,\n 2.9656, 2.8893, 2.8138, 2.7393, 2.6656, 2.8284, 2.7552, 2.6828, 2.6112,\n 2.5403, 2.7001, 2.8583, 2.7875, 2.7175, 2.6481, 2.5796, 2.7349, 2.6667,\n 2.5991, 2.5322, 2.6852, 2.6186, 2.5527, 2.7037, 2.8534, 2.7875, 2.9357,\n 3.0827, 3.0168, 2.9515, 2.8868, 2.8226, 2.9673, 2.9035, 2.8402, 2.7775,\n 2.7153, 2.8577, 2.9991, 2.9369, 2.8753, 2.8141, 2.7534, 2.8928, 2.8324,\n 2.7724, 2.7129, 2.8505, 2.7913, 2.7325, 2.8687, 3.0039, 2.9451, 3.0792,\n 3.2124, 3.1536, 3.0952, 3.0373, 2.9798, 3.1113, 3.0540, 2.9971, 2.9406,\n 3.0706, 3.0143, 2.9584, 3.0872, 3.2152, 3.3424])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "186", + "Fraction of T in Greenlist": "93.5%", + "z-score": "22.3", + "p value": "1.64e-110", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 5.4175, 5.6622, 5.8989, 6.1283,\n 6.3509, 6.5672, 6.7778, 6.9830, 7.1832, 7.3786, 7.5697, 7.7567,\n 7.9398, 8.1192, 8.2952, 8.4678, 8.2577, 8.4293, 8.5979, 8.7636,\n 8.9265, 9.0869, 9.2447, 9.4002, 9.5534, 9.7043, 9.8532, 10.0000,\n 10.1449, 10.2879, 10.4290, 10.5685, 10.7062, 10.8423, 10.9768, 11.1098,\n 11.2414, 11.3715, 11.5002, 11.6276, 11.7536, 11.8784, 11.7110, 11.8357,\n 11.9591, 12.0814, 12.2025, 12.3225, 12.4414, 12.5592, 12.6760, 12.7918,\n 12.9066, 13.0204, 13.1333, 13.2453, 13.3564, 13.4666, 13.5760, 13.6845,\n 13.7923, 13.8992, 13.7518, 13.8587, 13.9648, 14.0701, 14.1747, 14.2786,\n 14.3818, 14.4842, 14.5860, 14.6871, 14.7875, 14.8873, 14.9864, 15.0849,\n 15.1828, 15.2801, 15.3769, 15.4730, 15.5685, 15.6635, 15.7580, 15.8519,\n 15.9452, 16.0381, 16.1304, 16.2222, 16.0923, 16.1842, 16.2755, 16.3663,\n 16.4567, 16.5466, 16.6360, 16.7250, 16.8135, 16.9015, 16.9891, 17.0763,\n 17.1630, 17.2494, 17.3353, 17.4208, 17.5059, 17.5906, 17.6749, 17.7588,\n 17.6390, 17.7229, 17.8065, 17.8897, 17.9725, 18.0549, 18.1370, 18.2187,\n 18.3001, 18.3811, 18.4618, 18.5421, 18.6221, 18.7018, 18.7811, 18.8601,\n 18.9388, 19.0172, 19.0952, 19.1730, 19.2504, 19.3276, 19.4044, 19.4810,\n 19.5572, 19.6332, 19.5234, 19.5994, 19.6751, 19.7506, 19.8257, 19.9006,\n 19.9752, 20.0495, 20.1236, 20.1974, 20.2709, 20.3442, 20.4173, 20.4900,\n 20.5626, 20.6348, 20.7069, 20.7786, 20.8502, 20.9215, 20.8180, 20.8893,\n 20.9604, 21.0313, 21.1019, 21.1723, 21.2425, 21.3124, 21.3822, 21.4517,\n 21.5210, 21.5900, 21.6589, 21.7275, 21.7960, 21.8642, 21.9322, 22.0000,\n 22.0676, 22.1350, 22.0368, 22.1043, 22.1715, 22.2385, 22.3054])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The man couldn't lift his son because he was so heavy.\nWith pronoun replaced: The son was so heavy.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "26", + "Fraction of T in Greenlist": "16.2%", + "z-score": "-2.56", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -2.2361, -2.3094,\n -2.3805, -2.4495, -1.9868, -2.0656, -2.1418, -2.2156, -2.2873, -2.3570,\n -2.4249, -2.0381, -2.1111, -2.1822, -2.2514, -2.3190, -2.3850, -2.4495,\n -2.5126, -2.5744, -2.6349, -2.3094, -1.9932, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -1.9369, -2.0000,\n -2.0620, -2.1229, -2.1828, -2.2418, -2.2998, -2.3570, -2.1019, -2.1602,\n -2.2177, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.5205, -2.5717, -2.6222, -2.6722, -2.7217,\n -2.7705, -2.5504, -2.6000, -2.6491, -2.6976, -2.7456, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.4715, -2.2678, -2.3170, -2.3658, -2.4140, -2.4618,\n -2.5092, -2.3126, -2.3604, -2.4077, -2.4546, -2.5011, -2.5471, -2.5927,\n -2.6379, -2.6828, -2.4951, -2.5403, -2.5852, -2.6296, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.4885, -2.5322, -2.5756, -2.4004,\n -2.4441, -2.4874, -2.5304, -2.5731, -2.6154, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.4467, -2.2813, -2.3238, -2.3660, -2.2030, -2.2454,\n -2.2875, -2.3293, -2.1691, -2.2111, -2.2528, -2.2943, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.4225, -2.4623, -2.3094,\n -2.3494, -2.3891, -2.4286, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.6605, -2.6984, -2.7361, -2.7735, -2.6264, -2.6640, -2.5183, -2.5560])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -2.1602, -1.6398, -1.7321,\n -1.8204, -1.3608, -1.4570, -1.5492, -1.1339, -0.7385, -0.3612, 0.0000,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.1793, 1.4757, 1.7628, 2.0412,\n 1.9096, 1.7823, 2.0494, 1.9245, 2.1831, 2.0605, 1.9415, 2.1909,\n 2.0738, 2.3163, 2.2011, 2.0889, 2.3238, 2.5538, 2.7791, 3.0000,\n 2.8868, 2.7761, 2.6679, 2.8823, 3.0929, 3.2998, 3.5032, 3.3947,\n 3.5942, 3.4873, 3.3824, 3.5777, 3.7700, 3.9595, 3.8552, 4.0415,\n 3.9386, 4.1219, 4.0205, 4.2008, 4.3788, 4.5544, 4.4537, 4.6268,\n 4.5274, 4.6981, 4.8667, 4.7683, 4.6715, 4.8375, 5.0017, 4.9058,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.4160,\n 5.5691, 5.4772, 5.3865, 5.5377, 5.6875, 5.8358, 5.7458, 5.8926,\n 5.8035, 5.9488, 5.8606, 6.0044, 6.1470, 6.2883, 6.2008, 6.3408,\n 6.2541, 6.3928, 6.5303, 6.4444, 6.3595, 6.4957, 6.4116, 6.5465,\n 6.6804, 6.5970, 6.7298, 6.6471, 6.5653, 6.6968, 6.8274, 6.9570,\n 6.8757, 7.0043, 6.9237, 7.0513, 6.9714, 7.0980, 7.2236, 7.3485,\n 7.2691, 7.3930, 7.3143, 7.2363, 7.1590, 7.0823, 7.2051, 7.1291,\n 7.0537, 7.1755, 7.1007, 7.0265, 7.1474, 7.2675, 7.3869, 7.5056,\n 7.6235, 7.7407, 7.8571, 7.9729, 7.8988, 8.0139, 8.1282, 8.2420,\n 8.1683, 8.0952, 8.2082, 8.1356, 8.2479, 8.1758, 8.1043, 8.2158,\n 8.1448, 8.2557, 8.1851, 8.1150, 8.2252, 8.3349, 8.4439, 8.5524,\n 8.4826, 8.4133, 8.3446, 8.4523, 8.5595, 8.6662, 8.7724, 8.7039,\n 8.8094, 8.7414, 8.6738, 8.7788, 8.8832, 8.9872, 8.9199, 9.0233,\n 8.9565, 9.0593, 8.9929, 9.0952, 9.1971, 9.2986, 9.2324, 9.3333,\n 9.2676, 9.2022, 9.1372, 9.0726, 9.1730, 9.1088, 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Susan knew that Ann's son had been in a car accident, so she told her about it.\nWith pronoun replaced: Ann told her about it.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "34", + "Fraction of T in Greenlist": "17.1%", + "z-score": "-2.58", + "p value": "0.995", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.6654, -1.4142, -1.1677, -0.9258,\n -0.9941, -1.0613, -0.8268, -0.8944, -0.9610, -1.0265, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.5333, -1.5894, -1.6449, -1.6997, -1.7538, -1.8074,\n -1.6038, -1.6577, -1.7111, -1.7638, -1.8161, -1.6187, -1.6713, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.2222, -2.0461, -2.0918, -2.1372, -1.9640,\n -2.0096, -1.8385, -1.8843, -1.9298, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.9420, -1.9863, -1.8220, -1.8665, -1.9107, -1.9545, -1.9980, -2.0412,\n -2.0841, -2.1268, -2.1691, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.2197, -2.2608, -2.3016, -2.3422, -2.3825, -2.2287, -2.2692, -2.1170,\n -2.1576, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.2083, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.2578, -2.2966, -2.3351, -2.1909,\n -2.2296, -2.2680, -2.3063, -2.3443, -2.3822, -2.4198, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.4283, -2.2892, -2.3264, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.5893, -2.6247, -2.6599, -2.5265, -2.5618, -2.5969, -2.6319, -2.6667,\n -2.7013, -2.5700, -2.6047, -2.6393, -2.5092, -2.5439, -2.5784])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.9802, 1.3608, 1.7219, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.4915, 3.7524, 4.0056, 3.8367, 3.6742,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.2223, 5.4222, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.0623, 4.9316, 4.8038, 4.6790, 4.5569, 4.4374, 4.6291,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.4259, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.3578, 6.2505, 6.1450, 6.0410, 5.9386,\n 5.8377, 5.7382, 5.6401, 5.7955, 5.9491, 6.1012, 6.2517, 6.4008,\n 6.5483, 6.4510, 6.5970, 6.5008, 6.6454, 6.7886, 6.9305, 7.0711,\n 7.2104, 7.1152, 7.0211, 7.1591, 7.2960, 7.4316, 7.5661, 7.4730,\n 7.6064, 7.7387, 7.8699, 7.7778, 7.9079, 8.0370, 7.9460, 8.0741,\n 7.9839, 8.1111, 8.0219, 7.9336, 7.8463, 7.7598, 7.6742, 7.5895,\n 7.5056, 7.4225, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.2032, 8.1214, 8.2413, 8.3605, 8.4788, 8.5964, 8.7133,\n 8.6321, 8.5516, 8.6677, 8.7831, 8.8978, 9.0117, 8.9319, 9.0452,\n 9.1577, 9.2697, 9.1905, 9.3017, 9.4124, 9.3338, 9.4438, 9.3659,\n 9.4752, 9.3979, 9.3212, 9.2450, 9.1694, 9.0944, 9.0200, 8.9461,\n 8.8728, 8.9815, 9.0895, 9.1970, 9.3040, 9.4103, 9.5161, 9.4432,\n 9.5485, 9.4761, 9.5808, 9.6850, 9.7886, 9.8918, 9.9944, 9.9224,\n 9.8510, 9.9531, 10.0547, 10.1558, 10.2565, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.4140, 10.5128, 10.6111, 10.5410, 10.6389, 10.5692, 10.6667,\n 10.5974, 10.5286, 10.4603, 10.3923, 10.3248, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: When Tommy dropped his ice cream, Timmy giggled, so father gave him a stern look.\nWith pronoun replaced: Father gave Timmy a stern look.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -1.7942, -1.8500, -1.9052,\n -1.6893, -1.7450, -1.5333, -1.5894, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.1323,\n -1.1832, -1.0094, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -0.9409, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.0206,\n -1.0675, -1.1140, -0.9584, -1.0050, -0.8511, -0.8978, -0.7454, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.5855, -0.4376, -0.4845, -0.5311, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.7833, -0.8268, -0.8700, -0.7303,\n -0.7735, -0.6351, -0.6783, -0.5410, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.5579, -0.5991, -0.6402, -0.6810,\n -0.7216, -0.5927, -0.6333, -0.6737, -0.5459, -0.5864, -0.4595, -0.5000,\n -0.3740, -0.2487, -0.2894, -0.3299, -0.3702, -0.2462, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.7735,\n 6.0212, 5.7155, 5.9604, 6.1968, 5.9214, 6.1546, 5.8989, 6.1283,\n 6.3509, 6.1143, 5.8889, 5.6737, 5.4678, 5.6921, 5.9106, 6.1237,\n 6.3317, 6.5350, 6.7337, 6.5433, 6.3594, 6.5561, 6.7489, 6.9378,\n 7.1232, 7.3051, 7.4838, 7.6594, 7.8320, 7.6613, 7.8320, 8.0000,\n 7.8355, 8.0017, 7.8420, 8.0064, 8.1684, 8.0139, 7.8628, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.3589, 8.5105, 8.3716,\n 8.2353, 8.3859, 8.5347, 8.6817, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.4000, 9.5366, 9.4087, 9.5443, 9.4188, 9.5534,\n 9.6867, 9.5637, 9.4425, 9.3231, 9.2055, 9.3386, 9.4705, 9.6011,\n 9.7306, 9.8590, 9.9863, 9.8716, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.5027, 10.6232, 10.7429, 10.6329, 10.7518, 10.8699,\n 10.7616, 10.8790, 10.7722, 10.6667, 10.5623, 10.6793, 10.5763, 10.6927,\n 10.5909, 10.7066, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.1734,\n 11.2846, 11.3950, 11.2966, 11.1991, 11.1026, 11.0070, 10.9123, 11.0227,\n 11.1324, 11.2414, 11.3497, 11.2564, 11.3642, 11.4714, 11.5779, 11.6837,\n 11.5917, 11.6971, 11.8018, 11.7108, 11.8151, 11.7249, 11.8287, 11.7395,\n 11.8427, 11.9455, 12.0476, 12.1492, 12.0611, 12.1622, 12.0749, 12.1756,\n 12.2758, 12.3754, 12.2891, 12.2034, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.1489, 12.2474, 12.3455, 12.2627, 12.3603, 12.2782, 12.3754, 12.4722,\n 12.3908, 12.4872, 12.4065, 12.5024, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.8019, 12.8957, 12.9891, 12.9099, 13.0030, 13.0956, 13.0171, 12.9391,\n 13.0314, 12.9540, 13.0460, 13.1376, 13.2288, 13.3196, 13.2429, 13.3333,\n 13.4234, 13.5131, 13.4371, 13.5265, 13.4510, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: There is a pillar between me and the stage, and I can't see around it.\nWith pronoun replaced: I can't see around the pillar.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -2.3163, -2.3772, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.4495, -2.5062, -2.5621, -2.6171, -2.3570, -2.4133, -2.4689,\n -2.5236, -2.5775, -2.6308, -2.6833, -2.7351, -2.7863, -2.8368, -2.8868,\n -2.6496, -2.7005, -2.7508, -2.5205, -2.2937, -2.3462, -2.1241, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -1.9081, -1.9612, -2.0137, -1.8074,\n -1.8604, -1.9127, -1.9645, -1.7638, -1.8161, -1.8677, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.0247, -2.0732, -2.1213,\n -1.9345, -1.9829, -2.0309, -2.0785, -2.1256, -2.1723, -2.2186, -2.2646,\n -2.3101, -2.1309, -2.1768, -2.2222, -2.0461, -2.0918, -1.9180, -1.9640,\n -2.0096, -2.0548, -2.0997, -2.1442, -2.1884, -2.2323, -2.0641, -2.1082,\n -1.9420, -1.9863, -1.8220, -1.8665, -1.7041, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.7823,\n -1.8251, -1.8676, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -1.8983, -1.9392, -1.9799, -1.8324, -1.8732,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -1.8656, -1.9052, -1.7636, -1.8033, -1.8428, -1.7028, -1.7424, -1.7817,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -1.8898, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "50.0%", + "z-score": "2.31", + "p value": "0.0105", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.9379, 2.3094])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Look! There is a minnow swimming right below that duck! It had better get away to safety fast!\nWith pronoun replaced: The duck had better get away to safety fast!\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.5922, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.9567, -0.7939, -0.8433,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.6713, -0.7201, -0.7685, -0.8165,\n -0.6608, -0.7089, -0.7566, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -0.8385, -0.8847, -0.9304, -0.9759, -1.0211, -1.0659, -1.1105, -0.9623,\n -1.0069, -1.0512, -1.0952, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.3687, -1.4087, -1.4485, -1.3131, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.2049, -1.2445, -1.2839, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.1399, -1.1790, -1.2179, -1.0890, -1.1279, -1.1667,\n -1.2052, -1.2435, -1.1163, -1.1547, -1.1929, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "198", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.6%", + "z-score": "10.3", + "p value": "5.47e-25", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 1.3093, 1.1793, 1.4757, 1.3480, 1.2247,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.8034, 1.6859, 1.9415, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.0889, 2.3238, 2.5538, 2.7791, 2.6667,\n 2.8868, 3.1027, 2.9913, 2.8823, 3.0929, 3.2998, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.4061, 4.5847, 4.4809, 4.3788, 4.2784, 4.1797, 4.3546,\n 4.5274, 4.6981, 4.6000, 4.7683, 4.6715, 4.5760, 4.4820, 4.6476,\n 4.8113, 4.9731, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.4772, 5.6286, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.2725, 6.4153, 6.3248, 6.2354, 6.1470, 6.2883, 6.4283, 6.5672,\n 6.7049, 6.6171, 6.7536, 6.6667, 6.8019, 6.9361, 7.0692, 6.9830,\n 6.8977, 7.0296, 7.1605, 7.0759, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.5056, 7.6315, 7.5484, 7.6734, 7.7976, 7.9209, 8.0434, 7.9608,\n 8.0824, 8.0006, 7.9196, 7.8393, 7.7597, 7.8803, 8.0002, 8.1192,\n 8.2375, 8.3550, 8.4718, 8.3927, 8.5088, 8.4303, 8.5456, 8.6603,\n 8.5824, 8.6963, 8.8095, 8.7323, 8.8448, 8.7681, 8.6921, 8.8039,\n 8.7284, 8.8396, 8.7647, 8.6903, 8.8008, 8.9107, 8.8369, 8.9461,\n 9.0548, 8.9815, 8.9086, 8.8364, 8.7646, 8.8726, 8.9800, 9.0869,\n 9.1932, 9.2990, 9.2276, 9.1567, 9.2619, 9.3665, 9.4707, 9.5743,\n 9.6774, 9.6069, 9.5369, 9.6394, 9.7415, 9.6719, 9.6028, 9.7043,\n 9.8054, 9.9060, 9.8373, 9.9374, 9.8691, 9.9687, 9.9008, 10.0000,\n 9.9325, 10.0312, 10.1295, 10.2273, 10.3248, 10.2576])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bernard, who had not told the government official that he was less than 21 when he filed for a homestead claim, did not consider that he had done anything dishonest. Still, anyone who knew that he was 19 years old could take his claim away from him.\nWith pronoun replaced: Anyone who knew that he was 19 years old could take his claim away from anyone.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "16", + "Fraction of T in Greenlist": "21.9%", + "z-score": "-0.608", + "p value": "0.728", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.2641, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "87", + "Fraction of T in Greenlist": "43.7%", + "z-score": "6.1", + "p value": "5.36e-10", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 2.8868,\n 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 3.4641, 3.2206, 2.9938,\n 2.7815, 3.0984, 2.8977, 3.2004, 3.0096, 2.8284, 2.6558, 2.9439, 3.2222,\n 3.4915, 3.7524, 4.0056, 4.2515, 4.0825, 4.3217, 4.1586, 4.0012, 3.8490,\n 3.7017, 3.5590, 3.7905, 3.6515, 3.5165, 3.3853, 3.2577, 3.1334, 3.0123,\n 2.8943, 2.7791, 3.0000, 2.8868, 2.7761, 2.6679, 2.5621, 2.7757, 2.9856,\n 3.1918, 3.0861, 2.9824, 3.1840, 3.3824, 3.5777, 3.4743, 3.6662, 3.8552,\n 3.7528, 3.9386, 4.1219, 4.0205, 3.9208, 4.1008, 4.2784, 4.4537, 4.3546,\n 4.5274, 4.4296, 4.3333, 4.2385, 4.1451, 4.3146, 4.2222, 4.3894, 4.2981,\n 4.2080, 4.1192, 4.2836, 4.4462, 4.6070, 4.7662, 4.9237, 5.0795, 4.9904,\n 5.1444, 5.0562, 4.9691, 4.8830, 4.7980, 4.7140, 4.8655, 4.7823, 4.7001,\n 4.6188, 4.5384, 4.4590, 4.3804, 4.3027, 4.2258, 4.3740, 4.2977, 4.2222,\n 4.1475, 4.0736, 4.2196, 4.3644, 4.5079, 4.4341, 4.3609, 4.5029, 4.6437,\n 4.7834, 4.7104, 4.8488, 4.9862, 4.9135, 4.8414, 4.7700, 4.9058, 5.0406,\n 5.1744, 5.1031, 5.2358, 5.1650, 5.0948, 5.0252, 4.9562, 5.0873, 5.0187,\n 5.1488, 5.0806, 5.0130, 4.9460, 5.0747, 5.2025, 5.3295, 5.4557, 5.5811,\n 5.7056, 5.6383, 5.7619, 5.6949, 5.6285, 5.5626, 5.4971, 5.4322, 5.5544,\n 5.4899, 5.4257, 5.3621, 5.2989, 5.2362, 5.1739, 5.1121, 5.0507, 5.1711,\n 5.1100, 5.0494, 4.9891, 4.9292, 5.0485, 5.1671, 5.2850, 5.2251, 5.1657,\n 5.2827, 5.3991, 5.5149, 5.4554, 5.5705, 5.6849, 5.6256, 5.5668, 5.5082,\n 5.6217, 5.7347, 5.8470, 5.7885, 5.9002, 5.8420, 5.7841, 5.7266, 5.6695,\n 5.7802, 5.7233, 5.8333, 5.7766, 5.7203, 5.6643, 5.7735, 5.8822, 5.9905,\n 6.0982])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: When the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. They were very much in the majority.\nWith pronoun replaced: The sponsors were very much in the majority.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "68", + "Fraction of T in Greenlist": "34.2%", + "z-score": "2.99", + "p value": "0.00141", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.6398, -1.7321,\n -1.8204, -1.9052, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.8856,\n -1.9630, -1.5852, -1.6667, -1.7457, -1.8226, -1.8974, -1.5554, -1.6330,\n -1.7086, -1.3862, -1.4639, -1.5396, -1.6136, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.1189, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.0613, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.5443,\n -0.6082, -0.4027, -0.2000, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.1217, 0.3026, 0.4815, 0.4191, 0.3573, 0.2962, 0.4714,\n 0.6448, 0.8165, 0.7543, 0.6928, 0.8617, 1.0290, 1.1946, 1.1323,\n 1.0705, 1.0094, 0.9488, 0.8889, 0.8295, 0.7707, 0.7124, 0.8729,\n 1.0319, 0.9733, 1.1306, 1.2865, 1.4412, 1.5945, 1.7465, 1.6865,\n 1.6271, 1.7772, 1.9261, 2.0739, 2.0140, 1.9545, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.7522, 1.8953, 1.8385, 1.7823,\n 1.7264, 1.8676, 2.0078, 2.1470, 2.0907, 2.0349, 2.1726, 2.3094,\n 2.4453, 2.3891, 2.3333, 2.2780, 2.2230, 2.1685, 2.1143, 2.0605,\n 2.0071, 2.1401, 2.2723, 2.2188, 2.3500, 2.2966, 2.4267, 2.5560,\n 2.6846, 2.6309, 2.5776, 2.7050, 2.8316, 2.9575, 2.9040, 2.8508,\n 2.7979, 2.7454, 2.6932, 2.6414, 2.5898, 2.5386, 2.6623, 2.7852,\n 2.7340, 2.6830, 2.6323, 2.7541, 2.8752, 2.9957, 2.9448, 2.8943,\n 3.0138, 3.1327, 3.2509, 3.2002, 3.1497, 3.0995, 3.0496, 3.0000,\n 2.9507, 2.9016, 2.8528, 2.9692, 3.0851, 3.0363, 2.9877])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "109", + "Fraction of T in Greenlist": "54.8%", + "z-score": "9.7", + "p value": "1.51e-22", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 4.6291, 4.9193, 5.1962,\n 5.4611, 5.1711, 5.4306, 5.1640, 4.9135, 5.1698, 4.9358, 4.7140,\n 4.5033, 4.3027, 4.1111, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 6.0412, 5.8797, 5.7229, 5.9186, 5.7664, 5.9588, 5.8108, 6.0000,\n 5.8560, 6.0421, 6.2251, 6.0849, 5.9479, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.5442, 5.4259, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 6.1492, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.3509, 6.5033, 6.6541, 6.8034, 6.9511, 7.0973, 6.9945, 6.8931,\n 7.0379, 6.9378, 6.8391, 6.7416, 6.6454, 6.7886, 6.9305, 7.0711,\n 6.9759, 6.8819, 6.7890, 6.9282, 6.8364, 6.9743, 6.8834, 7.0201,\n 6.9303, 6.8414, 6.7536, 6.8889, 7.0231, 6.9361, 7.0692, 6.9830,\n 6.8977, 7.0296, 7.1605, 7.2904, 7.2058, 7.1220, 7.0391, 7.1678,\n 7.0857, 7.2134, 7.1319, 7.2587, 7.1779, 7.0980, 7.0187, 7.1443,\n 7.2691, 7.3930, 7.3143, 7.2363, 7.3592, 7.4813, 7.6026, 7.5251,\n 7.4483, 7.5687, 7.4924, 7.6120, 7.7308, 7.8489, 7.9663, 8.0829,\n 8.1988, 8.3140, 8.4286, 8.3526, 8.4664, 8.5796, 8.6921, 8.8039,\n 8.9151, 8.8396, 8.9502, 8.8752, 8.9851, 8.9107, 8.8369, 8.9461,\n 8.8728, 8.8000, 8.7278, 8.6560, 8.7646, 8.8726, 8.9800, 8.9087,\n 8.8379, 8.9447, 8.8744, 8.9806, 9.0863, 9.1915, 9.2961, 9.4002,\n 9.5038, 9.4338, 9.3642, 9.2952, 9.2265, 9.1584, 9.2613, 9.3638,\n 9.4658, 9.3980, 9.3306, 9.2637, 9.3651, 9.4661, 9.5666, 9.6667,\n 9.7663, 9.8654, 9.7987, 9.7325, 9.8311, 9.7653, 9.6998])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I can't cut that tree down with that axe; it is too thick.\nWith pronoun replaced: The tree is too thick.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "60", + "Fraction of T in Greenlist": "30.2%", + "z-score": "1.68", + "p value": "0.0467", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.5185, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.6644, 0.9366, 1.2019, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.7579, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.5963, 0.5175, 0.4399, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.8165,\n 1.0136, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 1.1043, 1.2910,\n 1.2189, 1.1476, 1.0773, 1.0079, 1.1898, 1.1206, 1.0523, 0.9847,\n 0.9180, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 1.0070, 1.1785,\n 1.1138, 1.0498, 1.2185, 1.1547, 1.0915, 1.0290, 0.9671, 1.1323,\n 1.2959, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.3700, 1.5275,\n 1.4664, 1.4059, 1.3460, 1.2865, 1.4412, 1.3819, 1.3231, 1.2649,\n 1.2072, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316, 1.2808, 1.4289,\n 1.3725, 1.3166, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423, 1.3862,\n 1.5291, 1.6710, 1.6160, 1.5614, 1.5073, 1.4535, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.5187, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.4792, 1.4284, 1.3779, 1.5110, 1.6432,\n 1.5926, 1.5423, 1.6732, 1.6230, 1.5731, 1.5236, 1.4743, 1.6036,\n 1.7321, 1.8598, 1.8102, 1.7609, 1.7119, 1.6632, 1.7894, 1.9149,\n 1.8660, 1.8175, 1.7693, 1.7213, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.6843, 1.6378, 1.5916, 1.7128, 1.8333,\n 1.7870, 1.7410, 1.8605, 1.8145, 1.7688, 1.7233, 1.6780])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "115", + "Fraction of T in Greenlist": "57.8%", + "z-score": "10.7", + "p value": "6.18e-27", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.0911, 0.8165,\n 1.3472, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 1.6082, 1.8974, 2.1776, 2.4495,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.3853, 3.2577, 3.4816, 3.7009, 3.9158, 3.7897, 3.6667,\n 3.8765, 3.7559, 3.9614, 4.1633, 4.3618, 4.2426, 4.1260, 4.0119,\n 3.9001, 4.0937, 4.2844, 4.4721, 4.6571, 4.8394, 4.7281, 4.9075,\n 5.0844, 5.2590, 5.1490, 5.0410, 5.2129, 5.3825, 5.2760, 5.1711,\n 5.3383, 5.5035, 5.4000, 5.5630, 5.7242, 5.8835, 5.7812, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.0474, 6.1996, 6.3502, 6.4993, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.4059, 6.3122, 6.4566, 6.5997,\n 6.7414, 6.6486, 6.7890, 6.9282, 7.0662, 6.9743, 6.8834, 7.0201,\n 7.1556, 7.0657, 7.2001, 7.3333, 7.4655, 7.3765, 7.2884, 7.4194,\n 7.5494, 7.6785, 7.8065, 7.7192, 7.6328, 7.7598, 7.8859, 7.8003,\n 7.7155, 7.6315, 7.7566, 7.8808, 7.7976, 7.9209, 8.0434, 8.1650,\n 8.0824, 8.0006, 8.1214, 8.2413, 8.1602, 8.2793, 8.3977, 8.5153,\n 8.4348, 8.3550, 8.4718, 8.5879, 8.7033, 8.8179, 8.7388, 8.6603,\n 8.5824, 8.6963, 8.6190, 8.5424, 8.6556, 8.7681, 8.8800, 8.8039,\n 8.9151, 9.0257, 9.1357, 9.0601, 8.9851, 9.0944, 9.2032, 9.1287,\n 9.2368, 9.3443, 9.4513, 9.3774, 9.3040, 9.4103, 9.5161, 9.6214,\n 9.7261, 9.6532, 9.5808, 9.6850, 9.7886, 9.7167, 9.8198, 9.7483,\n 9.8510, 9.7800, 9.8821, 9.8116, 9.9132, 10.0143, 10.1149, 10.0448,\n 10.1450, 10.0753, 10.1750, 10.2743, 10.3730, 10.4713, 10.4021, 10.3333,\n 10.4312, 10.5286, 10.4603, 10.3923, 10.4893, 10.5859, 10.6820])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The large ball crashed right through the table because it was made of styrofoam.\nWith pronoun replaced: The large ball was made of styrofoam.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -1.2222, -1.3093, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -1.2339, -1.3112, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.4003, -1.4596, -1.5181, -1.5759, -1.6330,\n -1.6893, -1.4765, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.8843, -1.7154, -1.7614, -1.8071, -1.6407, -1.4757,\n -1.5221, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.7566, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -2.0083,\n -2.0476, -2.0866, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -2.1381,\n -2.1762, -2.0369, -2.0751, -1.9370, -1.7997, -1.8383, -1.8767, -1.9149,\n -1.9528, -1.9906, -2.0282, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.2740, -2.1444, -2.1801, -2.2156, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.8898, 1.7233, 1.5650, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.3190, 2.1776, 2.0412,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.9424, 2.8098, 3.0509, 3.2863,\n 3.5165, 3.7417, 3.6098, 3.4816, 3.7009, 3.5753, 3.4528, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.8431, 3.7273, 3.9284, 3.8146, 4.0119,\n 4.2060, 4.0937, 4.2844, 4.1740, 4.3614, 4.5461, 4.4371, 4.3301,\n 4.5115, 4.6904, 4.8669, 5.0410, 4.9348, 4.8305, 4.7278, 4.6268,\n 4.5274, 4.6981, 4.8667, 4.7683, 4.6715, 4.8375, 4.7419, 4.9058,\n 5.0679, 5.2281, 5.1332, 5.0395, 4.9472, 4.8561, 5.0138, 5.1698,\n 5.3243, 5.2338, 5.3865, 5.2970, 5.4480, 5.5976, 5.7458, 5.8926,\n 5.8035, 5.7155, 5.8606, 5.7735, 5.6874, 5.6023, 5.5181, 5.6614,\n 5.8034, 5.9442, 5.8605, 6.0000, 5.9171, 6.0553, 6.1924, 6.1101,\n 6.2459, 6.1644, 6.2991, 6.4327, 6.3517, 6.2716, 6.4040, 6.5354,\n 6.6658, 6.7952, 6.7155, 6.6365, 6.5583, 6.4807, 6.4039, 6.5320,\n 6.6591, 6.5828, 6.5072, 6.6332, 6.5582, 6.6833, 6.8076, 6.9310,\n 6.8564, 6.7823, 6.7089, 6.6361, 6.7584, 6.8799, 7.0007, 6.9282,\n 7.0481, 6.9762, 7.0952, 7.2136, 7.3312, 7.4482, 7.3765, 7.3054,\n 7.4215, 7.3508, 7.2807, 7.2111, 7.1420, 7.2572, 7.3717, 7.4855,\n 7.4168, 7.5299, 7.4616, 7.5740, 7.6859, 7.6179, 7.7291, 7.8397,\n 7.7720, 7.8820, 7.8147, 7.7480, 7.8572, 7.9659, 8.0741, 8.1817,\n 8.1151, 8.0490, 7.9833, 7.9181, 7.8533, 7.9601, 8.0663, 8.0018,\n 7.9377, 8.0433, 7.9796, 8.0847, 8.1892, 8.2933, 8.2298, 8.1667,\n 8.1039, 8.0416, 8.1449, 8.2479, 8.3503, 8.2882, 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I tried to paint a picture of an orchard, with lemons in the lemon trees, but they came out looking more like light bulbs.\nWith pronoun replaced: The lemon trees came out looking more like light bulbs.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.5396, -1.2339, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.2041, -1.2599, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.0954, -1.1499, -1.2039, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.4863, -1.5360, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.3333, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.2865, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.3122, -1.3590, -1.4056, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.2597, -1.3036, -1.3472,\n -1.3904, -1.4335, -1.4762, -1.5187, -1.5608, -1.6028, -1.6444, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.9291, -1.9688, -1.8257,\n -1.8656, -1.9052, -1.7636, -1.8033, -1.8428, -1.8821, -1.9211, -1.9599,\n -1.9985, -1.8598, -1.8985, -1.9370, -1.9753, -1.8383, -1.7021, -1.7408,\n -1.7792, -1.6444, -1.6830, -1.7213, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.7218, -1.7592, -1.7964, -1.8333,\n -1.8701, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "117", + "Fraction of T in Greenlist": "58.8%", + "z-score": "11", + "p value": "1.72e-28", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.6019, 0.9428,\n 1.2702, 1.5852, 1.8889, 2.1822, 2.0370, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.5744, 2.8301, 2.6943, 2.9424, 2.8098, 2.6811, 2.9212,\n 3.1558, 3.0290, 3.2577, 3.1334, 3.3566, 3.5753, 3.4528, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.8177, 5.0034, 4.8857, 5.0684, 4.9528, 5.1326, 5.3100, 5.1962,\n 5.3709, 5.5432, 5.7133, 5.8812, 6.0469, 6.2106, 6.0982, 6.2598,\n 6.1492, 6.0404, 5.9333, 6.0928, 5.9874, 5.8835, 6.0410, 6.1968,\n 6.3509, 6.2483, 6.4006, 6.5514, 6.7006, 6.8483, 6.9945, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.5032, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.7555, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 7.7778, 7.9079, 7.8168, 7.7268, 7.8558,\n 7.9839, 8.1111, 8.0219, 8.1481, 8.2733, 8.1850, 8.3093, 8.4327,\n 8.5553, 8.6770, 8.5896, 8.7104, 8.6238, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.6083, 8.7267, 8.6433, 8.7610, 8.8778, 8.7952, 8.7133,\n 8.8294, 8.9448, 8.8636, 8.9783, 9.0923, 9.2055, 9.3181, 9.2376,\n 9.3495, 9.4608, 9.5714, 9.4916, 9.6016, 9.5224, 9.4438, 9.3659,\n 9.4752, 9.5840, 9.5066, 9.6148, 9.5381, 9.6456, 9.5695, 9.4939,\n 9.6008, 9.7072, 9.8131, 9.7380, 9.8433, 9.9481, 9.8736, 9.9778,\n 10.0814, 10.1846, 10.2872, 10.2132, 10.3154, 10.2419, 10.1690, 10.2706,\n 10.3717, 10.4724, 10.3999, 10.5001, 10.4281, 10.3566, 10.4563, 10.5556,\n 10.6544, 10.7527, 10.6817, 10.7795, 10.7090, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.6944, 10.7910, 10.8872, 10.9829, 10.9141, 11.0094])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Madonna fired her trainer because she slept with her boyfriend.\nWith pronoun replaced: Madonna slept with her boyfriend.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415, 4.3409, 4.0825,\n 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.7712, 3.5796, 3.3968, 3.2222,\n 3.0551, 2.8947, 3.1623, 3.0072, 2.8577, 2.7136, 2.5744, 2.4398, 2.3094,\n 2.1831, 2.0605, 1.9415, 1.8257, 1.7132, 1.6036, 1.8489, 1.7408, 1.6353,\n 1.5323, 1.7685, 2.0000, 1.8970, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142,\n 1.3234, 1.2344, 1.1471, 1.0613, 0.9771, 0.8944, 0.8131, 0.7332, 0.6547,\n 0.5774, 0.7877, 0.7107, 0.6348, 0.5601, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.4027, 0.6000, 0.7947, 0.7237, 0.9152, 1.1043, 1.0328, 0.9623,\n 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385, 0.6732, 0.6086,\n 0.5447, 0.4815, 0.6586, 0.8337, 0.7701, 0.7071, 0.6448, 0.5832, 0.7543,\n 0.9238, 0.8617, 0.8003, 0.7395, 0.9058, 1.0705, 1.0094, 0.9488, 0.8889,\n 0.8295, 0.9909, 0.9316, 0.8729, 1.0319, 0.9733, 0.9152, 1.0721, 1.2276,\n 1.1693, 1.1114, 1.0541, 0.9972, 1.1500, 1.0932, 1.2443, 1.1877, 1.1316,\n 1.0759, 1.0206, 1.1692, 1.1140, 1.0593, 1.0050, 0.9512, 0.8978, 1.0435,\n 0.9901, 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.5203, 0.4714, 0.4229, 0.5620, 0.7001,\n 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651, 0.3185, 0.2722,\n 0.2261, 0.1803, 0.3146, 0.2689, 0.2234, 0.1782, 0.1332, 0.2657, 0.2208,\n 0.1761, 0.1317, 0.0875, 0.0436, 0.0000, 0.1302, 0.0865, 0.0432, 0.1721,\n 0.3004, 0.2568, 0.2134, 0.1703, 0.2971, 0.2540, 0.2111, 0.1684, 0.2940,\n 0.2513, 0.2089, 0.1667, 0.1247, 0.2487, 0.2067, 0.1650, 0.1234, 0.0821,\n 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 3.2998, 2.8368, 2.4495, 2.8868,\n 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415, 4.3409, 4.0825,\n 4.3710, 4.6476, 4.4096, 4.6775, 4.9358, 4.7140, 4.5033, 4.7556, 4.5556,\n 4.3644, 4.6101, 4.4272, 4.6663, 4.4907, 4.7237, 4.9507, 4.7819, 4.6188,\n 4.8407, 4.6829, 4.5301, 4.7469, 4.9592, 5.1671, 5.3708, 5.5705, 5.7664,\n 5.6183, 5.4740, 5.3333, 5.5261, 5.7155, 5.9017, 5.7646, 5.6307, 5.8140,\n 5.6830, 5.5549, 5.4295, 5.3067, 5.4870, 5.3666, 5.2485, 5.4259, 5.3100,\n 5.1962, 5.3709, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 6.0982, 5.9876,\n 5.8789, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550, 6.6075,\n 6.5033, 6.4006, 6.5514, 6.4501, 6.5993, 6.4993, 6.6469, 6.7931, 6.9378,\n 7.0812, 6.9824, 7.1243, 7.0268, 6.9305, 7.0711, 7.2104, 7.3485, 7.2532,\n 7.3901, 7.5258, 7.4316, 7.5661, 7.4730, 7.3810, 7.5143, 7.4233, 7.5556,\n 7.6867, 7.8168, 7.7268, 7.8558, 7.7667, 7.6785, 7.8065, 7.7192, 7.8463,\n 7.9724, 7.8859, 7.8003, 7.9254, 8.0497, 8.1731, 8.0882, 8.0042, 7.9209,\n 7.8384, 7.9608, 7.8791, 7.7981, 7.9196, 7.8393, 7.9600, 8.0798, 8.1989,\n 8.1192, 8.2375, 8.1585, 8.0801, 8.1976, 8.1198, 8.2365, 8.3525, 8.2754,\n 8.1988, 8.3140, 8.4286, 8.5424, 8.4664, 8.3910, 8.3162, 8.2420, 8.3550,\n 8.2813, 8.2082, 8.3205, 8.2479, 8.3595, 8.4706, 8.5810, 8.5088, 8.6186,\n 8.5469, 8.4757, 8.5848, 8.5141, 8.6226, 8.7305, 8.6603, 8.5905, 8.6978,\n 8.8045, 8.9107, 8.8413, 8.7724, 8.8780, 8.8094, 8.9145, 8.8464, 8.7788,\n 8.8832, 8.8160, 8.9199, 9.0233, 9.1262, 9.0593, 9.1617, 9.0952, 9.0292,\n 9.1310, 9.0653, 9.1667, 9.2676, 9.2022, 9.1372, 9.2376, 9.3375, 9.4370,\n 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: If the con artist has succeeded in fooling Sam, he would have gotten a lot of money.\nWith pronoun replaced: Sam would have gotten a lot of money.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -2.1320, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.9336, -2.9775, -3.0210, -3.0641,\n -3.1069, -2.9161, -2.9593, -3.0022, -3.0448, -3.0870, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -2.8889, -2.9309, -2.9726, -3.0140, -3.0551,\n -2.8786, -2.9200, -2.7457, -2.7875, -2.8289, -2.6575, -2.6992, -2.7406,\n -2.7818, -2.8226, -2.6550, -2.6961, -2.5303, -2.5717, -2.4079, -2.4495,\n -2.4908, -2.3293, -2.3708, -2.4121, -2.4531, -2.4938, -2.3354, -2.3764,\n -2.4170, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.5412, -2.5802, -2.6190, -2.4678, -2.5068, -2.5456, -2.5841, -2.6224,\n -2.4738, -2.3262, -2.3651, -2.2188, -2.0735, -2.1128, -2.1520, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.2517, -2.2892, -2.3264, -2.1884, -2.2258, -2.2630,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.2133,\n -2.2497, -2.2860, -2.3221, -2.3580, -2.2258, -2.2618, -2.1306, -2.1667,\n -2.2026, -2.0726, -2.1086, -2.1444, -2.1801, -2.2156, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, 0.3651, 0.8704, 1.3333, 1.7614, 1.5430, 1.9379, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.0656, 2.3938, 2.7080, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.1623, 3.0072, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.6943, 2.9424, 3.1844, 3.4207, 3.2863,\n 3.1558, 3.0290, 2.9055, 3.1334, 3.0123, 2.8943, 3.1160, 3.3333,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.4101, 3.2998, 3.5032, 3.3947,\n 3.2883, 3.4873, 3.6831, 3.8759, 4.0657, 3.9595, 3.8552, 3.7528,\n 3.6522, 3.8376, 3.7383, 3.6407, 3.8228, 4.0024, 4.1797, 4.3546,\n 4.2571, 4.1612, 4.0667, 3.9736, 4.1451, 4.0531, 3.9624, 4.1312,\n 4.2981, 4.4630, 4.6262, 4.7875, 4.9472, 5.1051, 5.0138, 5.1698,\n 5.3243, 5.4772, 5.6286, 5.7785, 5.9270, 6.0740, 6.2197, 6.3640,\n 6.5069, 6.6486, 6.5569, 6.6973, 6.8364, 6.9743, 7.1110, 7.2466,\n 7.1556, 7.2900, 7.4233, 7.5556, 7.6867, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.2372, 8.3625, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.9178, 9.0370, 9.1553, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.4474, 9.5620, 9.6758, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.1494, 10.2592, 10.3683, 10.2837, 10.3923,\n 10.5002, 10.6076, 10.7143, 10.8204, 10.9259, 11.0309, 11.1352, 11.2390,\n 11.3423, 11.2589, 11.3616, 11.4638, 11.3812, 11.4829, 11.5841, 11.6847,\n 11.7849, 11.8846, 11.9837, 12.0824, 12.1805, 12.2782, 12.3754, 12.2940,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.9621, 13.0558,\n 13.1491, 13.2419, 13.3343, 13.4263, 13.5179, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.8007, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.5080, 14.5948, 14.5162, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The lawyer asked the witness a question, but he was reluctant to repeat it.\nWith pronoun replaced: The lawyer was reluctant to repeat it.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "32", + "Fraction of T in Greenlist": "16.1%", + "z-score": "-2.91", + "p value": "0.998", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.9149, -2.0000, -2.0817, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.7457, -1.8226, -1.4757, -1.5554, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.8257,\n -1.8935, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.4021, -1.4697, -1.5361, -1.6013, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -2.0207,\n -2.0767, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -1.9052,\n -1.9596, -2.0135, -1.8000, -1.8543, -1.9081, -1.9612, -2.0137, -2.0656,\n -1.8604, -1.9127, -1.9645, -2.0158, -2.0665, -2.1167, -1.9189, -1.9695,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.5322, -2.3564, -2.4004,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.4449, -2.4875, -2.5298,\n -2.5718, -2.6135, -2.4467, -2.4887, -2.5303, -2.5717, -2.6128, -2.6536,\n -2.6941, -2.7344, -2.5726, -2.4121, -2.4531, -2.4938, -2.5342, -2.5744,\n -2.6143, -2.4574, -2.4975, -2.5373, -2.5769, -2.6163, -2.6554, -2.6943,\n -2.7329, -2.7713, -2.8095, -2.8475, -2.8852, -2.9227, -2.9600, -2.8098,\n -2.6605, -2.6984, -2.5506, -2.5886, -2.6264, -2.6640, -2.7014, -2.7386,\n -2.7756, -2.8124, -2.8490, -2.8853, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.9581, -2.9935, -2.8532, -2.7137, -2.7495, -2.7852,\n -2.8208, -2.8561, -2.8913, -2.7541, -2.7894, -2.8245, -2.8595, -2.8943,\n -2.9289, -2.9633, -2.9976, -3.0317, -3.0657, -3.0995, -3.1332, -3.1667,\n -3.2000, -3.0674, -2.9355, -2.9692, -2.8383, -2.8721, -2.9058])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "138", + "# Tokens in Greenlist": "58", + "Fraction of T in Greenlist": "42.0%", + "z-score": "4.62", + "p value": "1.92e-06", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.8257, -1.2185, -1.3333, -0.8006, -0.3086, -0.4472, 0.0000,\n 0.4201, 0.2722, 0.6623, 1.0328, 0.8819, 0.7385, 1.0835, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 1.1793, 1.0541, 1.3480, 1.6330,\n 1.5076, 1.7823, 1.6590, 1.5396, 1.4237, 1.6859, 1.9415, 1.8257,\n 2.0738, 2.3163, 2.5533, 2.4371, 2.3238, 2.2133, 2.4422, 2.3333,\n 2.2269, 2.1229, 2.3445, 2.5621, 2.4585, 2.6713, 2.8804, 2.7775,\n 2.6765, 2.5775, 2.7811, 2.6833, 2.5873, 2.4930, 2.6914, 2.8868,\n 2.7928, 2.9848, 3.1741, 3.3607, 3.2667, 3.1743, 3.0833, 3.2660,\n 3.1760, 3.0873, 3.0000, 3.1789, 3.3556, 3.2686, 3.4427, 3.6148,\n 3.5283, 3.4429, 3.3587, 3.5277, 3.4442, 3.3619, 3.2806, 3.4466,\n 3.6107, 3.5298, 3.6919, 3.8523, 4.0112, 3.9302, 3.8503, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.6950, 3.8490, 4.0016, 3.9253, 4.0762,\n 4.2258, 4.1497, 4.0745, 4.0000, 4.1475, 4.0736, 4.0004, 3.9279,\n 4.0734, 4.2178, 4.1455, 4.2885, 4.4302, 4.3583, 4.2870, 4.2164,\n 4.3564, 4.2862, 4.2167, 4.1478, 4.2861, 4.4234, 4.3547, 4.4907,\n 4.6258, 4.5573, 4.4895, 4.4222, 4.5557, 4.4888, 4.4224, 4.3566,\n 4.4887, 4.6198])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Everyone really loved the oatmeal cookies; only a few people liked the chocolate chip cookies. Next time, we should make fewer of them.\nWith pronoun replaced: We should make fewer of the oatmeal cookies.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "3", + "Fraction of T in Greenlist": "33.3%", + "z-score": "0.577", + "p value": "0.282", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.5650, 1.8856,\n 1.7321, 2.0381, 2.3333, 2.6186, 2.4659, 2.7406, 2.5924, 2.8577,\n 3.1156, 2.9704, 2.8301, 3.0792, 2.9424, 3.1844, 3.4207, 3.6515,\n 3.5165, 3.7417, 3.6098, 3.8297, 4.0451, 3.9158, 3.7897, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.3618, 4.5569, 4.4374, 4.6291,\n 4.5118, 4.7002, 4.8857, 5.0684, 4.9528, 5.1326, 5.0190, 5.1962,\n 5.0844, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.5500, 5.7155,\n 5.6086, 5.7719, 5.9333, 6.0928, 5.9874, 6.1450, 6.0410, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.5514, 6.4501, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 6.8849, 7.0268, 6.9305, 7.0711,\n 6.9759, 7.1152, 7.2532, 7.3901, 7.2960, 7.4316, 7.3386, 7.4730,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.6867, 7.8168, 7.7268, 7.8558,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.0598, 8.1850, 8.0976, 8.2219,\n 8.1354, 8.2588, 8.3813, 8.5030, 8.4173, 8.5381, 8.4532, 8.5732,\n 8.6924, 8.8108, 8.7267, 8.8443, 8.9612, 9.0773, 8.9940, 9.1094,\n 9.0267, 9.1414, 9.2554, 9.1735, 9.0923, 9.2055, 9.1250, 9.2376,\n 9.3495, 9.4608, 9.3810, 9.4916, 9.4124, 9.5224, 9.6317, 9.5532,\n 9.4752, 9.5840, 9.5066, 9.6148, 9.7224, 9.8293, 9.7526, 9.8590,\n 9.7828, 9.8887, 9.9940, 9.9184, 9.8433, 9.9481, 9.8736, 9.9778,\n 10.0814, 10.1846, 10.1106, 10.2132, 10.1398, 10.2419, 10.3435, 10.2706,\n 10.1981, 10.2993, 10.2273, 10.3280, 10.4281, 10.5278, 10.4563, 10.5556,\n 10.4846, 10.5833, 10.6817, 10.6111, 10.5410, 10.6389, 10.5692, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.8872, 10.8184, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bob collapsed on the sidewalk. Soon he saw Carl coming to help. He was very ill.\nWith pronoun replaced: Carl was very ill.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -1.2472, -0.9685, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.3965, -0.4714, -0.5449, -0.6172,\n -0.6882, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.3475, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.2993, -0.3573, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.8296, -0.8779, -0.7201, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.7566, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.8847, -0.7346, -0.7807, -0.8266, -0.6783, -0.5311, -0.3849,\n -0.4315, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.4652, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.4974, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.5222,\n -0.5642, -0.6058, -0.4747, -0.3443, -0.3862, -0.4280, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.5864, -0.6266, -0.6667,\n -0.7065, -0.7461, -0.7856, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "102", + "Fraction of T in Greenlist": "51.3%", + "z-score": "8.55", + "p value": "5.96e-18", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 3.6108, 3.8490, 3.7017, 3.9337, 3.7905, 3.6515,\n 3.5165, 3.3853, 3.6098, 3.8297, 3.7009, 3.9158, 4.1265, 4.3333,\n 4.2064, 4.0825, 3.9614, 3.8431, 3.7273, 3.6141, 3.8146, 4.0119,\n 4.2060, 4.0937, 4.2844, 4.4721, 4.6571, 4.5461, 4.7281, 4.9075,\n 5.0844, 4.9747, 5.1490, 5.3211, 5.4909, 5.3825, 5.2760, 5.4433,\n 5.3383, 5.5035, 5.4000, 5.2981, 5.1978, 5.0990, 5.2615, 5.4222,\n 5.3245, 5.4832, 5.6401, 5.7955, 5.6986, 5.6032, 5.5090, 5.4160,\n 5.3243, 5.2338, 5.3865, 5.5377, 5.6875, 5.5976, 5.7458, 5.8926,\n 6.0380, 5.9488, 5.8606, 6.0044, 5.9172, 6.0596, 5.9732, 5.8878,\n 5.8034, 5.7199, 5.8605, 6.0000, 5.9171, 6.0553, 6.1924, 6.3283,\n 6.2459, 6.3807, 6.5144, 6.6471, 6.5653, 6.4842, 6.4040, 6.3246,\n 6.2459, 6.1680, 6.2990, 6.4291, 6.5583, 6.4807, 6.6089, 6.7361,\n 6.8624, 6.7854, 6.7090, 6.8343, 6.7585, 6.8828, 6.8076, 6.7330,\n 6.6591, 6.5857, 6.7089, 6.8313, 6.7584, 6.8799, 7.0007, 7.1207,\n 7.0481, 7.1673, 7.2857, 7.4034, 7.3312, 7.2596, 7.3765, 7.3054,\n 7.4215, 7.3508, 7.2807, 7.2111, 7.1420, 7.2572, 7.3717, 7.3030,\n 7.4168, 7.5299, 7.6424, 7.5740, 7.5061, 7.4386, 7.3717, 7.3051,\n 7.2391, 7.3506, 7.4615, 7.5719, 7.5061, 7.6158, 7.7249, 7.8335,\n 7.7679, 7.7028, 7.8107, 7.7460, 7.8533, 7.7889, 7.7249, 7.6613,\n 7.5981, 7.7047, 7.8107, 7.7478, 7.8533, 7.9582, 8.0627, 8.0000,\n 8.1039, 8.2074, 8.3103, 8.2479, 8.3503, 8.4523, 8.5538])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mr. Moncrieff visited Chester's luxurious New York apartment, thinking that it belonged to his son Edward. The result was that Mr. Moncrieff has decided to cancel Edward's allowance on the ground that he no longer requires his financial support.\nWith pronoun replaced: He no longer requires Chester's financial support.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "64", + "Fraction of T in Greenlist": "32.2%", + "z-score": "2.33", + "p value": "0.00983", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 1.0328, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.4364, 0.7505, 0.6325, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.3849, 0.2847, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.1466, 0.0727, 0.2887,\n 0.2148, 0.4264, 0.3527, 0.5601, 0.4865, 0.4140, 0.3426, 0.2722,\n 0.2027, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.2520, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.9864, 0.9238, 0.8617, 1.0290, 1.1946, 1.3587,\n 1.2959, 1.2337, 1.1721, 1.3333, 1.4931, 1.4313, 1.3700, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.4412, 1.3819, 1.3231, 1.4757,\n 1.4171, 1.5681, 1.5097, 1.4517, 1.6008, 1.7488, 1.8956, 1.8371,\n 1.7792, 1.7217, 1.6646, 1.6081, 1.7522, 1.6958, 1.6398, 1.7823,\n 1.7264, 1.6710, 1.6160, 1.5614, 1.5073, 1.6473, 1.7864, 1.7321,\n 1.6781, 1.8157, 1.9524, 1.8983, 1.8446, 1.7913, 1.9263, 1.8732,\n 1.8204, 1.7679, 1.9013, 1.8490, 1.7970, 1.9291, 2.0604, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.8033, 1.9327, 2.0613, 2.0105, 1.9599,\n 1.9097, 2.0369, 2.1634, 2.1131, 2.0631, 2.0134, 1.9640, 1.9149,\n 1.8660, 1.9906, 2.1145, 2.0656, 2.0170, 2.1398, 2.0913, 2.2133,\n 2.1648, 2.1167, 2.2377, 2.3580, 2.4778, 2.4294, 2.3812, 2.3333,\n 2.2857, 2.2384, 2.3567, 2.3094, 2.2624, 2.3798, 2.3329])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "82", + "Fraction of T in Greenlist": "41.2%", + "z-score": "5.28", + "p value": "6.47e-08", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000, 0.5774,\n 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321, 1.5403, 1.9052,\n 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142, 1.2702, 1.5852, 1.8889,\n 2.1822, 2.0370, 1.8974, 2.1776, 2.4495, 2.7136, 2.9704, 3.2205, 3.4641,\n 3.3221, 3.5590, 3.4207, 3.6515, 3.8772, 4.0980, 3.9620, 3.8297, 3.7009,\n 3.9158, 4.1265, 4.0000, 3.8765, 3.7559, 3.6380, 3.5228, 3.4101, 3.6141,\n 3.8146, 3.7033, 3.5942, 3.7905, 3.9837, 4.1740, 4.0657, 3.9595, 4.1461,\n 4.3301, 4.2251, 4.1219, 4.0205, 3.9208, 3.8228, 3.7264, 3.9056, 4.0825,\n 4.2571, 4.1612, 4.0667, 3.9736, 4.1451, 4.3146, 4.2222, 4.1312, 4.0415,\n 3.9530, 3.8657, 3.7796, 3.9452, 4.1090, 4.0234, 3.9389, 4.1003, 4.2601,\n 4.4182, 4.3339, 4.2507, 4.4066, 4.5611, 4.4783, 4.3966, 4.3158, 4.2359,\n 4.1569, 4.0788, 4.2303, 4.3804, 4.5291, 4.4511, 4.3740, 4.2977, 4.4444,\n 4.5899, 4.5140, 4.4388, 4.3644, 4.2907, 4.2178, 4.1455, 4.2885, 4.4302,\n 4.3583, 4.2870, 4.4272, 4.5663, 4.7044, 4.6332, 4.5626, 4.6992, 4.8348,\n 4.7645, 4.6949, 4.6258, 4.5573, 4.4895, 4.4222, 4.5557, 4.6883, 4.8200,\n 4.7527, 4.6860, 4.6198, 4.7501, 4.8795, 4.8135, 4.7481, 4.6832, 4.6188,\n 4.5549, 4.4915, 4.6190, 4.7458, 4.6825, 4.6198, 4.7454, 4.8702, 4.9943,\n 4.9316, 4.8693, 4.9923, 5.1146, 5.0525, 4.9908, 4.9295, 4.8687, 4.8083,\n 4.7483, 4.8690, 4.9891, 5.1085, 5.0485, 4.9889, 4.9297, 5.0480, 5.1657,\n 5.1066, 5.0479, 4.9896, 4.9317, 4.8742, 4.8170, 4.9333, 5.0489, 4.9918,\n 4.9351, 5.0499, 5.1642, 5.2778, 5.2211, 5.1647, 5.2775, 5.3898, 5.3335,\n 5.2776, 5.2220, 5.1667, 5.1117, 5.0571, 5.1681, 5.2786, 5.3886, 5.3340,\n 5.2796])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Tatyana knew that Grandma always enjoyed serving an abundance of food to her guests. Now Tatyana watched as Grandma gathered Tatyana's small mother into a wide, scrawny embrace and then propelled her to the table, lifting her shawl from her shoulders, seating her in the place of honor, and saying simply: \"There's plenty.\"\nWith pronoun replaced: Grandma gathered Tatyana's small mother into a wide, scrawny embrace and then propelled Tatyana to the table.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.6445, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.5922, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.1783, -1.2247,\n -1.2708, -1.1140, -1.1602, -1.2060, -1.2516, -1.2968, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.1263, -1.1711, -1.2155, -1.0659, -1.1105, -1.1547,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.2257, -1.2686, -1.3112,\n -1.3536, -1.3957, -1.4376, -1.4792, -1.5206, -1.5617, -1.4194, -1.4606,\n -1.3195, -1.3608, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.3284, -1.3687, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.5492, -1.4162, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.6087, -1.6466, -1.6843, -1.7218, -1.7592, -1.7964, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.9640, 2.4495, 2.8868,\n 3.2863, 2.9593, 3.3333, 3.6829, 4.0119, 3.7268, 4.0415, 3.7808, 3.5382,\n 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 4.2426, 4.5033, 4.7556, 4.5556,\n 4.3644, 4.1812, 4.0056, 3.8367, 4.0825, 3.9196, 3.7626, 3.6108, 3.4641,\n 3.3221, 3.1844, 3.0509, 3.2863, 3.1558, 3.0290, 2.9055, 2.7852, 3.0123,\n 2.8943, 2.7791, 2.6667, 2.5568, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570,\n 2.2576, 2.1602, 2.0647, 2.2743, 2.1798, 2.0870, 2.2916, 2.4930, 2.6914,\n 2.8868, 2.7928, 2.7005, 2.6098, 2.5205, 2.4327, 2.3462, 2.2611, 2.4495,\n 2.6354, 2.5504, 2.4667, 2.3842, 2.3028, 2.2226, 2.1436, 2.0656, 2.2453,\n 2.4228, 2.3448, 2.2678, 2.1918, 2.1167, 2.2902, 2.2156, 2.3868, 2.3126,\n 2.2393, 2.1669, 2.0954, 2.0247, 1.9548, 1.8856, 1.8173, 1.9829, 1.9149,\n 1.8475, 2.0107, 1.9437, 1.8773, 1.8116, 1.7467, 1.9066, 1.8419, 1.7778,\n 1.7143, 1.6514, 1.8084, 1.9640, 2.1182, 2.0548, 1.9920, 1.9298, 1.8682,\n 1.8071, 1.7465, 1.6865, 1.8370, 1.7772, 1.7179, 1.6591, 1.8074, 1.7488,\n 1.6906, 1.6330, 1.5758, 1.5191, 1.4629, 1.4071, 1.3517, 1.2968, 1.2423,\n 1.1882, 1.1345, 1.2778, 1.2243, 1.1711, 1.3128, 1.2597, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.2257, 1.1746, 1.1239, 1.2603,\n 1.2096, 1.1593, 1.1094, 1.0598, 1.1942, 1.1447, 1.0954, 1.0465, 0.9979,\n 0.9497, 1.0820, 1.0338, 0.9858, 0.9382, 0.8909, 0.8438, 0.7971, 0.9272,\n 0.8805, 0.8340, 0.9629, 1.0911, 1.2185, 1.3453, 1.2982, 1.2514, 1.2049,\n 1.1587, 1.1127, 1.0670, 1.0215, 1.1461, 1.2700, 1.2244, 1.1790, 1.1339,\n 1.0890, 1.0444, 1.0000, 0.9558, 1.0777, 1.1990, 1.1547, 1.1106, 1.0668,\n 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Ann asked Mary what time the library closes, because she had forgotten.\nWith pronoun replaced: Mary had forgotten.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "85", + "# Tokens in Greenlist": "24", + "Fraction of T in Greenlist": "28.2%", + "z-score": "0.689", + "p value": "0.245", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n 0.3464, 0.6794, 1.0000, 0.8729, 0.7505, 0.6325, 0.9333, 1.2247,\n 1.1055, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.5345, 0.4402, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.2981, 0.5175, 0.4399, 0.3637, 0.5774,\n 0.7877, 0.7107, 0.6348, 0.5601, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.8238, 0.7559, 0.6888])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "123", + "Fraction of T in Greenlist": "61.8%", + "z-score": "12", + "p value": "1.96e-33", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 3.6556, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 4.4543, 4.7140,\n 4.9652, 4.7556, 5.0000, 5.2372, 5.4678, 5.2705, 5.0811, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.2697, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.3687, 6.5485, 6.7254, 6.5823, 6.7568, 6.9286, 7.0980,\n 6.9589, 6.8229, 6.6896, 6.5591, 6.7269, 6.8924, 7.0557, 6.9282,\n 7.0895, 7.2488, 7.1240, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.6512, 7.8000, 7.9472, 7.8296, 7.9754, 7.8598, 8.0042,\n 8.1471, 8.0335, 8.1750, 8.3152, 8.4540, 8.3425, 8.2325, 8.3702,\n 8.5067, 8.3984, 8.5337, 8.6678, 8.5612, 8.6942, 8.5891, 8.7210,\n 8.8518, 8.9815, 9.1101, 9.2376, 9.3641, 9.4896, 9.6141, 9.5111,\n 9.6348, 9.7574, 9.8792, 9.7778, 9.6775, 9.5784, 9.6995, 9.8198,\n 9.9392, 10.0577, 10.1754, 10.0779, 10.1948, 10.3110, 10.4263, 10.3301,\n 10.2348, 10.1405, 10.2554, 10.3695, 10.4829, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.8363, 10.9462, 11.0554, 10.9637, 11.0724, 10.9816, 10.8916,\n 10.9998, 10.9107, 11.0183, 10.9301, 11.0371, 10.9497, 11.0562, 11.1621,\n 11.0756, 11.1810, 11.0952, 11.0102, 10.9259, 10.8423, 10.9473, 10.8644,\n 10.9689, 10.8867, 10.8051, 10.7242, 10.6439, 10.5642, 10.4852, 10.5893,\n 10.6929, 10.6145, 10.7175, 10.8200, 10.7423, 10.8443, 10.7671, 10.8686,\n 10.9697, 10.8931, 10.9936, 11.0937, 11.1933, 11.1173, 11.0418, 11.1410,\n 11.2396, 11.1648, 11.2630, 11.3608, 11.2864, 11.3837, 11.3099, 11.4068,\n 11.5033, 11.5993, 11.6949, 11.7901, 11.8849, 11.8117, 11.9060, 12.0000,\n 12.0935, 12.1867, 12.1141, 12.0419, 12.1347, 12.0630, 11.9917])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: George got free tickets to the play, but he gave them to Eric, even though he was particularly eager to see it.\nWith pronoun replaced: Eric was particularly eager to see it.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.9%", + "z-score": "-0.37", + "p value": "0.644", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, -0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.9467, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 0.8724, 0.7857, 0.7006, 0.9258,\n 0.8412, 0.7581, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.0658, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.4977, -0.3303, -0.3836, -0.4364,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.2669, -0.3189, -0.1588, 0.0000,\n -0.0525, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.2503, -0.2993, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.5990, -0.6430, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.6783, -0.7213, -0.5843, -0.4481, -0.4914, -0.5345,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.3951, -0.4377, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.3780, -0.4189, -0.2924, -0.3333,\n -0.3740, -0.4145, -0.4548, -0.4949, -0.3702])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "179", + "Fraction of T in Greenlist": "89.9%", + "z-score": "21.2", + "p value": "1.13e-99", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 3.5382, 3.3113, 3.0984, 2.8977, 3.2004, 3.4912, 3.7712,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.6101, 4.4272, 4.6663, 4.8990,\n 5.1257, 4.9507, 5.1723, 5.3886, 5.6000, 5.8068, 5.6395, 5.8424,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.6398, 6.8214, 6.6667,\n 6.8457, 7.0219, 7.1952, 7.0456, 7.2168, 7.3853, 7.5514, 7.7152,\n 7.5707, 7.7326, 7.8923, 8.0498, 8.2054, 8.0656, 8.2195, 8.3716,\n 8.5218, 8.6702, 8.8168, 8.9618, 8.8271, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.5304, 9.6667, 9.8015, 9.9351, 10.0673, 10.1982, 10.3280,\n 10.4565, 10.5838, 10.7099, 10.8350, 10.9589, 11.0818, 11.2036, 11.3244,\n 11.4442, 11.5630, 11.6809, 11.7978, 11.9138, 12.0289, 12.1432, 12.2565,\n 12.3690, 12.4807, 12.5916, 12.7017, 12.8110, 12.9196, 13.0274, 13.1344,\n 13.2408, 13.3464, 13.4513, 13.5556, 13.6591, 13.7620, 13.8643, 13.9659,\n 14.0669, 14.1673, 14.2671, 14.3663, 14.4649, 14.5629, 14.6604, 14.7573,\n 14.8536, 14.9495, 15.0447, 15.1395, 15.2337, 15.3275, 15.4207, 15.5134,\n 15.6057, 15.6975, 15.7888, 15.8796, 15.9700, 16.0599, 16.1494, 16.2384,\n 16.3270, 16.4152, 16.5030, 16.5903, 16.6772, 16.7638, 16.8499, 16.9356,\n 17.0209, 17.1059, 17.1905, 17.2747, 17.3585, 17.4420, 17.5251, 17.6078,\n 17.6902, 17.7722, 17.8539, 17.9353, 18.0163, 18.0970, 18.1774, 18.2574,\n 18.3371, 18.4165, 18.4956, 18.5744, 18.6529, 18.7310, 18.8089, 18.8865,\n 18.9637, 19.0407, 19.1174, 19.1938, 19.2700, 19.3458, 19.4214, 19.4967,\n 19.5717, 19.6465, 19.7210, 19.7952, 19.8692, 19.9430, 20.0164, 20.0897,\n 20.1626, 20.2354, 20.3078, 20.3801, 20.4521, 20.5238, 20.5954, 20.6667,\n 20.7377, 20.8086, 20.8792, 20.9496, 21.0197, 21.0897, 21.1594])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The delivery truck zoomed by the school bus because it was going so slow.\nWith pronoun replaced: The school bus was going so slow.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.4364, 0.3216, 0.2108, 0.5185, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.5922, -0.6537, -0.4547, -0.5164,\n -0.5774, -0.3825, -0.1901, -0.2520, -0.0626, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.2222, -0.2765, -0.3303, -0.3836, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.3904, -0.2431, -0.2907, -0.1448, 0.0000,\n -0.0479, -0.0956, 0.0476, 0.0000, -0.0473, -0.0943, -0.1410, 0.0000,\n -0.0467, 0.0930, 0.0464, 0.0000, -0.0461, -0.0919, -0.1374, -0.1826,\n -0.2275, -0.2722, -0.3166, -0.3607, -0.4045, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.2657, -0.1325, -0.1761, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.2596, -0.3021, -0.3443, -0.3862, -0.2568, -0.2988, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.3369, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.5347, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 1.1547,\n 1.5403, 1.9052, 2.2517, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.4219, 3.2660,\n 3.1156, 3.3665, 3.6108, 3.8490, 3.7017, 3.9337, 4.1603, 4.3818,\n 4.2378, 4.4544, 4.3142, 4.1779, 4.3894, 4.5968, 4.4634, 4.6667,\n 4.8662, 5.0623, 4.9316, 5.1241, 4.9962, 4.8712, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.0469, 6.2106, 6.3723, 6.5320,\n 6.4195, 6.5773, 6.4667, 6.3578, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.6075, 6.7583, 6.6541, 6.8034, 6.7006, 6.5993, 6.4993, 6.6469,\n 6.5483, 6.6944, 6.8391, 6.9824, 6.8849, 7.0268, 6.9305, 6.8354,\n 6.7414, 6.8819, 6.7890, 6.9282, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.1556, 7.0657, 6.9768, 7.1111, 7.0231, 6.9361, 6.8500, 6.9830,\n 7.1149, 7.2459, 7.1605, 7.2904, 7.2058, 7.1220, 7.0391, 7.1678,\n 7.0857, 7.2134, 7.3402, 7.4661, 7.3845, 7.5094, 7.4286, 7.3485,\n 7.2691, 7.3930, 7.3143, 7.4373, 7.5595, 7.6808, 7.6026, 7.7232,\n 7.6456, 7.5687, 7.4924, 7.6120, 7.7308, 7.6551, 7.5800, 7.6980,\n 7.8153, 7.9318, 7.8571, 7.9729, 8.0880, 8.2024, 8.1282, 8.2420,\n 8.3550, 8.2813, 8.3937, 8.5054, 8.6165, 8.5433, 8.6537, 8.7636,\n 8.8728, 8.8000, 8.7278, 8.8364, 8.7646, 8.8726, 8.9800, 9.0869,\n 9.0155, 9.1218, 9.2276, 9.3328, 9.2619, 9.3665, 9.4707, 9.4002,\n 9.3302, 9.2607, 9.3642, 9.4673, 9.5698, 9.5007, 9.6028, 9.7043,\n 9.8054, 9.7367, 9.8373, 9.9374, 9.8691, 9.9687, 10.0679, 10.1667,\n 10.0987, 10.1970, 10.2949, 10.3923, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Jane gave Joan candy because she was hungry.\nWith pronoun replaced: Jane was hungry.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "67", + "Fraction of T in Greenlist": "33.7%", + "z-score": "2.82", + "p value": "0.00237", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -1.2309, -0.8427, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -0.6831, -0.7698, -0.8542, -0.5620, -0.2774, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, 0.1633, 0.4042, 0.3203, 0.5551, 0.4714, 0.3892, 0.6172,\n 0.8412, 0.7581, 0.6765, 0.8944, 1.1088, 1.3198, 1.2366, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.1202, 1.3206, 1.5181, 1.7130, 1.9052,\n 2.0948, 2.0135, 1.9333, 2.1193, 2.3028, 2.4841, 2.4034, 2.3238,\n 2.2453, 2.4228, 2.3448, 2.5198, 2.4423, 2.3658, 2.5378, 2.7080,\n 2.6316, 2.7995, 2.7235, 2.8893, 2.8138, 2.9775, 3.1394, 3.0641,\n 2.9897, 2.9161, 2.8433, 2.7713, 2.9299, 2.8583, 3.0151, 2.9439,\n 3.0989, 3.2525, 3.1814, 3.3333, 3.4839, 3.4130, 3.5620, 3.4915,\n 3.4217, 3.3526, 3.2841, 3.4308, 3.3627, 3.2953, 3.4402, 3.3731,\n 3.3066, 3.2408, 3.3838, 3.3182, 3.2533, 3.1889, 3.1251, 3.2660,\n 3.4058, 3.3420, 3.2788, 3.2161, 3.1539, 3.0923, 3.2299, 3.1685,\n 3.3049, 3.4403, 3.3789, 3.5132, 3.4521, 3.5853, 3.5245, 3.6566,\n 3.7878, 3.7270, 3.6667, 3.6068, 3.5474, 3.4884, 3.4298, 3.3717,\n 3.3140, 3.2567, 3.3853, 3.5131, 3.4558, 3.3989, 3.3424, 3.2863,\n 3.2306, 3.1753, 3.3012, 3.2460, 3.1912, 3.1368, 3.0827, 3.0290,\n 2.9756, 3.0997, 3.0464, 2.9935, 2.9410, 2.8887, 2.8368, 2.9593,\n 2.9076, 3.0292, 2.9776, 3.0984, 3.0469, 2.9957, 3.1156, 3.0645,\n 3.0138, 2.9633, 3.0821, 3.0317, 2.9817, 2.9320, 2.8825, 2.8333,\n 2.9507, 3.0674, 3.0182, 2.9692, 2.9205, 2.8721, 2.8240])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "98", + "Fraction of T in Greenlist": "49.2%", + "z-score": "7.9", + "p value": "1.41e-15", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.6943, 2.9424, 2.8098, 2.6811, 2.5560,\n 2.7952, 3.0290, 3.2577, 3.1334, 3.3566, 3.2348, 3.4528, 3.3333,\n 3.5466, 3.7559, 3.9614, 3.8431, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.5118, 4.7002, 4.8857, 4.7703, 4.9528, 5.1326, 5.3100, 5.1962,\n 5.0844, 5.2590, 5.4312, 5.3211, 5.4909, 5.3825, 5.2760, 5.1711,\n 5.3383, 5.2350, 5.1333, 5.0332, 4.9346, 5.0990, 5.0017, 5.1640,\n 5.0679, 4.9731, 5.1332, 5.2915, 5.1977, 5.3541, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 5.9270, 5.8358, 5.9827, 6.1283,\n 6.0380, 6.1820, 6.0927, 6.0044, 5.9172, 5.8310, 5.9732, 5.8878,\n 5.8034, 5.7199, 5.6373, 5.7778, 5.6959, 5.8351, 5.7540, 5.8919,\n 6.0287, 6.1644, 6.0837, 6.2183, 6.3517, 6.4842, 6.4040, 6.5354,\n 6.6658, 6.7952, 6.7155, 6.6365, 6.7648, 6.8922, 6.8138, 6.9402,\n 6.8624, 6.7854, 6.7090, 6.8343, 6.7585, 6.6833, 6.6088, 6.5350,\n 6.6591, 6.5857, 6.7089, 6.6361, 6.7584, 6.8799, 7.0007, 6.9282,\n 7.0481, 7.1673, 7.2857, 7.2136, 7.3312, 7.4482, 7.5644, 7.4927,\n 7.4215, 7.5369, 7.6517, 7.5809, 7.6950, 7.6246, 7.5548, 7.4855,\n 7.4168, 7.5299, 7.4616, 7.3937, 7.3263, 7.2594, 7.3717, 7.3051,\n 7.4167, 7.3506, 7.2849, 7.3958, 7.5061, 7.4407, 7.5503, 7.6594,\n 7.7679, 7.7028, 7.8107, 7.9181, 8.0249, 7.9601, 7.8956, 8.0018,\n 8.1075, 8.0433, 8.1485, 8.0847, 8.0212, 7.9582, 8.0627, 8.0000,\n 7.9377, 7.8758, 7.8142, 7.9179, 7.8567, 7.9599, 7.8990])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mark heard Steve's feet going down the ladder. The door of the shop closed after him. He ran to look out the window.\nWith pronoun replaced: Mark ran to look out the window.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "51", + "Fraction of T in Greenlist": "25.6%", + "z-score": "0.205", + "p value": "0.419", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 1.3525, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, 0.0000, 0.2085, 0.1380, 0.3426, 0.5443,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.5164,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.3095, 0.2462,\n 0.1836, 0.3651, 0.3026, 0.2408, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.5717, 0.7395, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.4611, 0.6124,\n 0.7625, 0.7089, 0.8575, 1.0050, 1.1514, 1.0973, 1.0435, 0.9901,\n 1.1345, 1.0812, 1.0284, 0.9759, 0.9238, 1.0659, 1.0139, 0.9623,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.8485, 0.7987, 0.7493,\n 0.7001, 0.6513, 0.6029, 0.5547, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.1782,\n 0.3109, 0.4428, 0.3974, 0.5283, 0.6584, 0.7878, 0.7419, 0.6963,\n 0.6509, 0.7789, 0.7336, 0.6885, 0.6437, 0.5991, 0.7255, 0.6810,\n 0.6367, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.5431, 0.5000,\n 0.4571, 0.4145, 0.3721, 0.3299, 0.2879, 0.2462, 0.2046])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.9640, 1.6330,\n 1.3472, 1.0954, 0.8704, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.0835, 1.4142,\n 1.7321, 2.0381, 1.8889, 1.7457, 1.6082, 1.8974, 1.7628, 2.0412,\n 2.3116, 2.5744, 2.8301, 2.6943, 2.5627, 2.8098, 2.6811, 2.9212,\n 2.7952, 2.6726, 2.9055, 3.1334, 3.3566, 3.2348, 3.1160, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.7273, 3.9284, 4.1260, 4.0119,\n 4.2060, 4.0937, 3.9837, 3.8759, 4.0657, 4.2528, 4.1461, 4.0415,\n 3.9386, 4.1219, 4.0205, 4.2008, 4.3788, 4.5544, 4.7278, 4.8990,\n 5.0680, 4.9666, 5.1333, 5.0332, 4.9346, 4.8375, 5.0017, 5.1640,\n 5.3245, 5.2281, 5.1332, 5.0395, 5.1977, 5.1051, 5.2614, 5.4160,\n 5.3243, 5.4772, 5.6286, 5.7785, 5.6875, 5.8358, 5.7458, 5.8926,\n 6.0380, 5.9488, 5.8606, 5.7735, 5.9172, 5.8310, 5.9732, 6.1143,\n 6.2541, 6.3928, 6.3070, 6.2222, 6.3595, 6.4957, 6.4116, 6.5465,\n 6.4632, 6.5970, 6.7298, 6.6471, 6.5653, 6.4842, 6.6157, 6.5354,\n 6.6658, 6.7952, 6.9237, 7.0513, 7.1779, 7.3037, 7.4286, 7.3485,\n 7.4724, 7.3930, 7.5161, 7.6383, 7.5595, 7.4813, 7.4039, 7.5251,\n 7.4483, 7.5687, 7.6883, 7.8072, 7.9253, 7.8489, 7.7732, 7.8905,\n 8.0070, 7.9318, 8.0476, 7.9729, 8.0880, 8.2024, 8.1282, 8.0546,\n 7.9816, 8.0952, 8.0227, 8.1356, 8.2479, 8.3595, 8.4706, 8.5810,\n 8.6908, 8.8000, 8.7278, 8.8364, 8.7646, 8.8726, 8.9800, 8.9087,\n 8.8379, 8.7676, 8.8744, 8.8045, 8.9107, 9.0164, 9.1215, 9.2261,\n 9.1566, 9.0876, 9.1916, 9.2952, 9.2265, 9.3295, 9.2613, 9.3638,\n 9.4658, 9.3980, 9.3306, 9.2637, 9.3651, 9.2986, 9.3995, 9.5000,\n 9.6000, 9.6996, 9.7987, 9.8974, 9.9957, 9.9294, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Although they ran at about the same speed, Sue beat Sally because she had such a good start.\nWith pronoun replaced: Sally had such a good start.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -0.7581, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.5832, -0.6383, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.5505, -0.3836, -0.2182,\n -0.2716, -0.3244, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.0983, -0.1469, 0.0000, -0.0486, 0.0969, 0.0483, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.0473, -0.0943, 0.0470, 0.0000,\n 0.1400, 0.2791, 0.2319, 0.1849, 0.1382, 0.2756, 0.2289, 0.3651,\n 0.3185, 0.4536, 0.4070, 0.3607, 0.3146, 0.2689, 0.4021, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.1317, 0.2626, 0.2182, 0.1741,\n 0.3038, 0.4327, 0.3884, 0.3443, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.4669, 0.5927, 0.5489, 0.5053, 0.4620, 0.4189, 0.3760, 0.3333,\n 0.4571, 0.5803, 0.5375, 0.4949, 0.4525, 0.5744, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.1004, 2.4495, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.4915, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.5176, 3.3665, 3.6108, 3.8490, 3.7017, 3.9337, 4.1603, 4.0166,\n 4.2378, 4.0980, 4.3142, 4.5260, 4.3894, 4.5968, 4.4634, 4.3333,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.1236, 5.3067, 5.4870, 5.6647, 5.8398, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.0469, 6.2106, 6.0982, 5.9876,\n 5.8789, 6.0404, 6.2000, 6.0928, 6.2505, 6.4065, 6.5607, 6.4550,\n 6.6075, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 6.9945, 7.1393,\n 7.2827, 7.4247, 7.5653, 7.4639, 7.6033, 7.7414, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.8520, 7.7555, 7.8889, 8.0212, 8.1524,\n 8.0571, 8.1873, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.9138, 9.0354, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.6566, 9.7725, 9.8877, 10.0021,\n 10.1157, 10.2287, 10.3409, 10.4524, 10.3630, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 10.9301, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.6667, 11.7696, 11.8719, 11.9737, 12.0749, 12.1756,\n 12.2758, 12.1893, 12.2891, 12.3883, 12.4870, 12.5852, 12.6830, 12.7802,\n 12.6949, 12.7918, 12.8881, 12.9840, 13.0795, 13.1745, 13.2690, 13.3631,\n 13.4567, 13.5499, 13.6427, 13.7350, 13.8270, 13.9185, 13.8350, 13.9262,\n 14.0170, 14.1074, 14.1974, 14.2870, 14.3762, 14.2939, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.7348, 14.8219, 14.9086, 14.9950, 15.0810, 15.1667,\n 15.2520, 15.3370, 15.4217, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Fred is the only man still alive who remembers my great-grandfather. He is a remarkable man.\nWith pronoun replaced: Fred is a remarkable man.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, 0.0000,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.4714, -0.2335, -0.3086,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.2722,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.1217, 0.0605, 0.0000, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.6140, -0.6667, -0.7189, -0.5505, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.5922, -0.4288, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.5726, -0.6222, -0.6713, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.6983, -0.7454, -0.7921,\n -0.8385, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -0.8151, -0.8601, -0.9048, -0.7593, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -1.0038, -0.8704,\n -0.9113, -0.9520, -0.9925, -1.0328, -1.0729, -1.1127, -0.9816, -1.0215,\n -1.0612, -0.9313, -0.8022, -0.8422, -0.8819, -0.9215, -0.9608, -0.8333,\n -0.8727, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 1.0911, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.0000, 1.7614, 2.1602, 2.5342, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 3.4017, 3.6927, 3.9727, 3.7712,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.4907,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 5.0576, 5.2697, 5.1121,\n 4.9592, 4.8107, 4.6664, 4.5260, 4.3894, 4.2563, 4.4634, 4.6667,\n 4.8662, 4.7357, 4.9316, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.3067, 5.1864, 5.3666, 5.5442, 5.4259, 5.3100, 5.4848,\n 5.6573, 5.5432, 5.7133, 5.8812, 6.0469, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.7242, 5.6220, 5.5213, 5.4222,\n 5.5811, 5.7382, 5.8936, 5.7955, 5.9491, 6.1012, 6.2517, 6.1546,\n 6.0587, 6.2075, 6.3549, 6.2601, 6.1664, 6.3122, 6.4566, 6.3640,\n 6.2725, 6.4153, 6.5569, 6.4663, 6.6066, 6.7456, 6.8834, 6.7937,\n 6.9303, 7.0657, 7.2001, 7.1111, 7.0231, 6.9361, 6.8500, 6.7648,\n 6.6804, 6.5970, 6.7298, 6.8615, 6.9923, 6.9094, 7.0391, 7.1678,\n 7.2956, 7.2134, 7.1319, 7.0513, 6.9714, 6.8922, 6.8138, 6.7361,\n 6.8624, 6.9879, 7.1125, 7.0353, 7.1590, 7.2818, 7.4039, 7.3271,\n 7.2510, 7.3721, 7.4924, 7.4168, 7.5364, 7.6551, 7.7732, 7.6980,\n 7.6235, 7.5495, 7.4762, 7.4034, 7.3312, 7.2596, 7.3765, 7.4927,\n 7.6082, 7.5369, 7.6517, 7.7658, 7.8793, 7.8084, 7.7380, 7.6681,\n 7.5988, 7.5299, 7.4616, 7.3937, 7.5061, 7.6179, 7.7291, 7.6615,\n 7.7720, 7.8820, 7.9913, 7.9241, 7.8572, 7.7908, 7.7249, 7.6594,\n 7.5944, 7.5297, 7.6381, 7.7460, 7.8533, 7.7889, 7.8956, 8.0018,\n 8.1075, 8.0433, 7.9796, 7.9162, 7.8533, 7.7907, 7.7285, 7.6667,\n 7.7715, 7.8758, 7.9796, 7.9179, 8.0212, 8.1240, 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Always before, Larry had helped Dad with his work. But he could not help him now, for Dad said that his boss at the railroad company would not want anyone but him to work in the office.\nWith pronoun replaced: Larry could not help him now.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 1.0911, 1.6330,\n 1.3472, 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, 0.0000,\n -0.1155, 0.2265, 0.5556, 0.8729, 0.7505, 0.6325, 0.5185, 0.4082,\n 0.3015, 0.5941, 0.4880, 0.3849, 0.2847, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.3482, 0.2582, 0.1703, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.0778, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.2801, 0.2085, 0.1380, 0.3426, 0.2722,\n 0.4730, 0.6712, 0.8667, 0.7947, 0.7237, 0.6537, 0.8444, 0.7746,\n 0.9623, 0.8926, 0.8238, 0.7559, 0.6888, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.5447, 0.7223, 0.8980, 0.8337, 0.7701, 0.9428,\n 0.8793, 1.0498, 1.2185, 1.3856, 1.3213, 1.2577, 1.1946, 1.1323,\n 1.0705, 1.2337, 1.1721, 1.1111, 1.0507, 0.9909, 0.9316, 1.0911,\n 1.0319, 0.9733, 0.9152, 0.8577, 0.8006, 0.9567, 1.1114, 1.0541,\n 0.9972, 1.1500, 1.0932, 1.2443, 1.3943, 1.5430, 1.4857, 1.4289,\n 1.3725, 1.3166, 1.2611, 1.4071, 1.3517, 1.4963, 1.4410, 1.5842,\n 1.7264, 1.8676, 1.8119, 1.7566, 1.7018, 1.6473, 1.5933, 1.7321,\n 1.6781, 1.6246, 1.5714, 1.7085, 1.6554, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.5818, 1.7158, 1.6641, 1.6127, 1.7454, 1.6941, 1.8257,\n 1.9566, 2.0866, 2.0350, 1.9837, 1.9327, 1.8821, 1.8317, 1.9599,\n 1.9097, 1.8598, 1.8102, 1.7609, 1.7119, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.5967, 1.7213, 1.8453, 1.7974, 1.7498, 1.8728,\n 1.8252, 1.9473, 2.0688, 2.1896, 2.1418, 2.0943, 2.0470, 2.0000,\n 1.9533, 2.0726, 2.0259, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "132", + "Fraction of T in Greenlist": "66.3%", + "z-score": "13.5", + "p value": "1.26e-41", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 2.8402, 2.3570, 2.8368, 3.2660,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.3409, 4.6268, 4.9008, 5.1640, 4.9135, 4.6775, 4.4543, 4.7140,\n 4.9652, 5.2085, 5.0000, 5.2372, 5.4678, 5.6921, 5.9106, 5.7155,\n 5.9297, 6.1389, 6.3434, 6.1584, 6.3594, 6.5561, 6.7489, 6.5727,\n 6.4019, 6.2361, 6.4273, 6.6150, 6.7992, 6.6398, 6.8214, 7.0000,\n 7.1756, 7.3485, 7.1952, 7.3659, 7.5340, 7.3853, 7.5514, 7.7152,\n 7.8766, 8.0358, 7.8923, 8.0498, 7.9097, 8.0656, 7.9286, 7.7942,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 8.1428, 8.2908, 8.1650,\n 8.3116, 8.1881, 8.0667, 7.9472, 8.0928, 8.2369, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.0179, 9.1493, 9.0401, 8.9324, 9.0629, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.6995, 9.5939, 9.7183, 9.8416, 9.9640,\n 10.0855, 10.2061, 10.1024, 10.2222, 10.1199, 10.2390, 10.3571, 10.4745,\n 10.5909, 10.7066, 10.8215, 10.7211, 10.8353, 10.9488, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.5048, 11.4065, 11.5157, 11.6242, 11.7320, 11.6351,\n 11.7424, 11.6465, 11.5515, 11.6584, 11.7647, 11.8704, 11.7766, 11.8818,\n 11.9863, 12.0902, 12.1936, 12.2963, 12.3985, 12.5001, 12.4081, 12.5093,\n 12.4181, 12.5188, 12.4286, 12.3391, 12.2503, 12.3508, 12.2628, 12.3629,\n 12.2758, 12.3754, 12.2891, 12.3883, 12.4870, 12.4015, 12.3167, 12.4150,\n 12.5129, 12.4289, 12.3455, 12.4430, 12.5401, 12.6367, 12.7329, 12.8285,\n 12.9238, 13.0185, 13.1129, 13.2068, 13.1246, 13.2182, 13.1367, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.3585, 13.4499,\n 13.3710, 13.4620, 13.5526, 13.4744, 13.3967, 13.4871, 13.5771, 13.5000,\n 13.5897, 13.5131, 13.4371, 13.5265, 13.4510, 13.5401, 13.4651])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: George got free tickets to the play, but he gave them to Eric, because he was particularly eager to see it.\nWith pronoun replaced: Eric was particularly eager to see it.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.1155, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, 0.0000, -0.0949, 0.1873, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.4402, 0.6963, 0.6025, 0.8513, 1.0948, 1.0000,\n 0.9073, 0.8165, 0.7276, 0.9608, 1.1896, 1.0999, 1.0120, 0.9258,\n 0.8412, 1.0613, 1.2778, 1.1926, 1.1088, 1.0265, 1.2366, 1.4434,\n 1.3606, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.7130, 1.9052,\n 1.8245, 2.0135, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 2.0656,\n 2.2453, 2.1678, 2.0913, 2.0158, 1.9413, 2.1167, 2.2902, 2.2156,\n 2.1420, 2.0692, 1.9973, 2.1669, 2.3349, 2.2629, 2.1917, 2.1213,\n 2.0517, 1.9829, 1.9149, 2.0785, 2.2405, 2.1723, 2.1049, 2.0381,\n 2.1974, 2.3552, 2.2884, 2.4444, 2.5991, 2.5322, 2.4660, 2.4004,\n 2.3354, 2.4874, 2.6381, 2.5731, 2.5087, 2.4449, 2.3817, 2.5298,\n 2.6768, 2.6135, 2.5508, 2.4887, 2.6336, 2.7775, 2.7153, 2.8577,\n 2.9991, 2.9369, 2.8753, 2.8141, 2.7534, 2.6933, 2.6336, 2.5744,\n 2.5156, 2.4574, 2.3995, 2.3422, 2.2852, 2.2287, 2.1726, 2.1170,\n 2.0617, 2.0068, 1.9524, 1.8983, 1.8446, 1.7913, 1.7384, 1.6859,\n 1.6337, 1.7679, 1.9013, 1.8490, 1.9813, 1.9291, 1.8773, 2.0083,\n 2.1386, 2.0866, 2.0350, 1.9837, 1.9327, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.8598, 1.9868, 1.9370, 1.8875, 1.8383, 1.7894, 1.7408,\n 1.6925, 1.6444, 1.7693, 1.8935, 1.8453, 1.7974, 1.7498, 1.7025,\n 1.6555, 1.6087, 1.5621, 1.5159, 1.6378, 1.5916, 1.7128, 1.6667,\n 1.6208, 1.7410, 1.8605, 1.9795, 1.9333, 1.8874, 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "136", + "Fraction of T in Greenlist": "68.3%", + "z-score": "14.1", + "p value": "1.43e-45", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.7233, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.7778, 3.0551, 3.3235, 3.1623, 3.4219, 3.6742,\n 3.9196, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.7469,\n 4.9592, 4.8107, 5.0186, 4.8742, 5.0779, 5.2778, 5.1371, 5.0000,\n 4.8662, 5.0623, 4.9316, 5.1241, 5.3134, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.0125, 5.8919, 5.7735,\n 5.9438, 5.8275, 5.7133, 5.6011, 5.7689, 5.9346, 6.0982, 6.2598,\n 6.4195, 6.5773, 6.4667, 6.6227, 6.5137, 6.6679, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 6.9511, 6.8483, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.3233, 7.4639, 7.6033, 7.5032, 7.6413, 7.7782,\n 7.9138, 7.8150, 7.9495, 8.0829, 8.2151, 8.3463, 8.4763, 8.6053,\n 8.7333, 8.8602, 8.7629, 8.8889, 9.0139, 9.1380, 9.0419, 9.1652,\n 9.2874, 9.4088, 9.3140, 9.4346, 9.5543, 9.6732, 9.7912, 9.9085,\n 10.0249, 10.1405, 10.2554, 10.1621, 10.2763, 10.3898, 10.5025, 10.6145,\n 10.7257, 10.6338, 10.5427, 10.6534, 10.5632, 10.4738, 10.5841, 10.6936,\n 10.8025, 10.9107, 11.0183, 11.1253, 11.0371, 11.1435, 11.2493, 11.3546,\n 11.4592, 11.5632, 11.4762, 11.3899, 11.4935, 11.5966, 11.5111, 11.6137,\n 11.7157, 11.8172, 11.9181, 12.0185, 11.9341, 12.0341, 12.1335, 12.0499,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 13.0307, 12.9491, 13.0431, 13.1367, 13.0558,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.5292, 13.6201,\n 13.7106, 13.6313, 13.5526, 13.4744, 13.5647, 13.6546, 13.7442, 13.8333,\n 13.9221, 14.0106, 13.9332, 14.0214, 14.1091, 14.1966, 14.1199])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: They broadcast an announcement, but a subway came into the station and I couldn't hear over it.\nWith pronoun replaced: I couldn't hear the subway.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 2.3570, 1.9640, 2.4495, 2.1170,\n 2.5560, 2.2630, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321, 1.5403, 1.3608,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.0835, 0.9428, 0.8083, 1.1323, 1.4444,\n 1.3093, 1.1793, 1.0541, 0.9333, 0.8165, 0.7035, 0.5941, 0.4880, 0.7698,\n 1.0441, 1.3112, 1.2019, 1.4606, 1.3525, 1.2472, 1.1446, 1.3926, 1.2910,\n 1.1918, 1.0948, 1.3333, 1.5671, 1.7963, 1.6977, 1.6013, 1.5068, 1.4142,\n 1.6348, 1.5430, 1.4530, 1.3646, 1.2778, 1.1926, 1.1088, 1.0265, 0.9456,\n 0.8660, 0.7877, 0.9949, 0.9169, 0.8402, 0.7646, 0.6901, 0.6167, 0.5443,\n 0.7433, 0.9396, 1.1333, 1.0596, 0.9869, 1.1767, 1.1043, 1.2910, 1.2189,\n 1.4027, 1.3308, 1.5119, 1.4403, 1.3697, 1.2999, 1.2309, 1.1628, 1.0954,\n 1.0289, 0.9631, 0.8980, 0.8337, 1.0070, 1.1785, 1.1138, 1.0498, 0.9864,\n 0.9238, 0.8617, 0.8003, 0.7395, 0.6794, 0.6198, 0.5608, 0.7256, 0.6667,\n 0.8295, 0.7707, 0.7124, 0.6547, 0.5974, 0.7570, 0.6999, 0.6433, 0.8006,\n 0.7441, 0.6880, 0.6325, 0.7873, 0.7318, 0.6768, 0.6222, 0.5680, 0.7201,\n 0.6660, 0.6124, 0.5592, 0.5064, 0.4540, 0.4020, 0.5507, 0.4988, 0.4472,\n 0.5941, 0.5426, 0.6881, 0.6366, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.3797, 0.3311, 0.4714, 0.6108, 0.7493, 0.7001,\n 0.6513, 0.7884, 0.7396, 0.8755, 0.8268, 0.9615, 0.9129, 1.0465, 0.9979,\n 0.9497, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127, 0.6662, 0.6199, 0.5740,\n 0.5283, 0.4828, 0.4377, 0.5674, 0.6963, 0.6509, 0.6058, 0.5610, 0.5164,\n 0.4721, 0.4280, 0.3841, 0.3405, 0.4669, 0.5927, 0.7177, 0.6737, 0.7979,\n 0.7539, 0.7102, 0.6667, 0.7896, 0.7461, 0.7029, 0.6598, 0.7816, 0.9027,\n 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "44.4%", + "z-score": "1.35", + "p value": "0.089", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Grant worked hard to harvest his beans so he and his family would have enough to eat that winter, His friend Henry let him stack them in his barn where they would dry. Later, he and Tatyana would shell them and cook them for their Sunday dinners.\nWith pronoun replaced: Later, he and Tatyana would shell them and cook them for the beans' Sunday dinners.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 1.3472, 1.8257, 1.5667, 1.3333, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.3093, 1.1793, 1.4757, 1.3480, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 1.0954,\n 0.9918, 0.8909, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.7276, 0.6405, 0.5551, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.2520, -0.3131, -0.3735, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.1741, -0.2309, -0.2872, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.4444, -0.4977, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.7048, -0.7493,\n -0.7935, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.7641, -0.8066, -0.8489, -0.8909,\n -0.7550, -0.6199, -0.6623, -0.7044, -0.5706, -0.6128, -0.4801, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.5053, -0.3780, -0.4189, -0.4595, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.1650, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "99", + "Fraction of T in Greenlist": "49.7%", + "z-score": "8.06", + "p value": "3.73e-16", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 0.9802, 0.8165, 1.1921, 1.5492, 1.3859, 1.2309, 1.5650, 1.8856,\n 1.7321, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.5924, 2.8577,\n 2.7136, 2.5744, 2.4398, 2.3094, 2.5627, 2.8098, 3.0509, 2.9212,\n 3.1558, 3.0290, 2.9055, 2.7852, 3.0123, 3.2348, 3.4528, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.8431, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.2060, 4.0937, 4.2844, 4.4721, 4.6571, 4.5461, 4.7281, 4.6188,\n 4.5115, 4.4061, 4.5847, 4.7610, 4.9348, 4.8305, 4.7278, 4.6268,\n 4.7977, 4.9666, 4.8667, 5.0332, 4.9346, 4.8375, 4.7419, 4.9058,\n 5.0679, 5.2281, 5.1332, 5.0395, 4.9472, 4.8561, 5.0138, 5.1698,\n 5.3243, 5.4772, 5.3865, 5.2970, 5.4480, 5.5976, 5.5088, 5.6569,\n 5.5690, 5.7155, 5.6285, 5.7735, 5.6874, 5.8310, 5.7457, 5.8878,\n 5.8034, 5.9442, 5.8605, 6.0000, 5.9171, 6.0553, 5.9732, 6.1101,\n 6.0287, 6.1644, 6.0837, 6.2183, 6.1382, 6.2716, 6.1923, 6.3246,\n 6.2459, 6.3770, 6.2990, 6.4291, 6.3517, 6.4807, 6.4039, 6.5320,\n 6.4558, 6.5828, 6.5072, 6.6332, 6.5582, 6.6833, 6.6088, 6.7330,\n 6.6591, 6.7823, 6.7089, 6.8313, 6.7584, 6.8799, 6.8075, 6.9282,\n 6.8563, 6.9762, 6.9048, 7.0238, 6.9529, 7.0711, 7.0006, 7.1181,\n 7.0481, 7.1647, 7.0952, 7.2111, 7.1420, 7.2572, 7.1885, 7.3030,\n 7.2348, 7.3485, 7.2807, 7.3937, 7.3263, 7.4386, 7.3717, 7.4833,\n 7.4167, 7.5277, 7.4615, 7.5719, 7.5061, 7.6158, 7.5503, 7.6594,\n 7.5944, 7.7028, 7.6381, 7.7460, 7.6816, 7.7889, 7.7249, 7.8316,\n 7.7679, 7.8740, 7.8107, 7.9162, 7.8533, 7.9582, 7.8956, 8.0000,\n 7.9377, 8.0416, 7.9796, 8.0829, 8.0212, 8.1240, 8.0627])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Beth didn't get angry with Sally, who had cut her off, because she stopped and apologized.\nWith pronoun replaced: Sally stopped and apologized.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "54", + "Fraction of T in Greenlist": "27.1%", + "z-score": "0.696", + "p value": "0.243", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.2265, 0.1111, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.7924, 0.6963, 0.6025, 0.5108, 0.4211, 0.6667,\n 0.5774, 0.4899, 0.4042, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.6901, 0.8907, 0.8165,\n 0.7433, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.6888, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.4815, 0.4191, 0.3573, 0.5331, 0.7071,\n 0.6448, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.4529,\n 0.3944, 0.5608, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.6999, 0.6433, 0.8006, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.7318, 0.6768, 0.6222, 0.5680, 0.5143, 0.4611, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.6030, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.4915, 0.4407, 0.3904, 0.5348, 0.4845, 0.6276, 0.7698,\n 0.9110, 0.8601, 0.8095, 0.7593, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.6912, 0.6430, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.5410, 0.4944, 0.6274, 0.5808, 0.5345,\n 0.6662, 0.7971, 0.7506, 0.7044, 0.6584, 0.6128, 0.5674, 0.5222,\n 0.4774, 0.6058, 0.5610, 0.6885, 0.8154, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.7979, 0.7539, 0.8773, 0.8333,\n 0.7896, 0.7461, 0.7029, 0.8248, 0.7816, 0.7385, 0.6958])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 2.1170,\n 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 3.2206, 3.5382,\n 3.8411, 3.6148, 3.9056, 4.1851, 4.4543, 4.2426, 4.5033, 4.7556, 5.0000,\n 4.8008, 4.6101, 4.8488, 5.0811, 4.8990, 5.1257, 5.3468, 5.5626, 5.3886,\n 5.6000, 5.8068, 6.0093, 5.8424, 5.6805, 5.5234, 5.7229, 5.5705, 5.7664,\n 5.6183, 5.4740, 5.6667, 5.8560, 5.7155, 5.9017, 6.0849, 6.2651, 6.1283,\n 5.9944, 6.1721, 6.3472, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.1828,\n 6.3509, 6.5166, 6.3960, 6.5597, 6.7213, 6.8810, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.1143, 7.0000, 6.8876, 7.0401, 6.9294, 7.0803, 6.9714, 6.8641,\n 7.0133, 7.1611, 7.0553, 7.2016, 7.3464, 7.4897, 7.3855, 7.2827, 7.4247,\n 7.5653, 7.4639, 7.6033, 7.7414, 7.8782, 7.7782, 7.6794, 7.5818, 7.7174,\n 7.6210, 7.7555, 7.6603, 7.5661, 7.6995, 7.8318, 7.7387, 7.8699, 8.0000,\n 8.1291, 8.0370, 8.1651, 8.2923, 8.4184, 8.3274, 8.4526, 8.3625, 8.2733,\n 8.1850, 8.3093, 8.4327, 8.3453, 8.2588, 8.1731, 8.0882, 8.0042, 8.1266,\n 8.2483, 8.3691, 8.2858, 8.4057, 8.3231, 8.4423, 8.5607, 8.6783, 8.5964,\n 8.7133, 8.8294, 8.9448, 8.8636, 8.7831, 8.7033, 8.6241, 8.5456, 8.6603,\n 8.7742, 8.8874, 8.8095, 8.9221, 8.8448, 8.9567, 9.0679, 9.1785, 9.1018,\n 9.0257, 8.9502, 8.8752, 8.8008, 8.9107, 9.0200, 9.1287, 9.0548, 9.1629,\n 9.0895, 9.1970, 9.3040, 9.4103, 9.3374, 9.2651, 9.1932, 9.1218, 9.0510,\n 9.1567, 9.2619, 9.3665, 9.2961, 9.4002, 9.3302, 9.4338, 9.5369, 9.6394,\n 9.5698, 9.5007, 9.4320, 9.3638, 9.2960, 9.3980, 9.4995, 9.6005, 9.5331,\n 9.6336, 9.5666, 9.6667, 9.7663, 9.8654, 9.7987, 9.7325, 9.6666, 9.6011,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: There is a pillar between me and the stage, and I can't see it.\nWith pronoun replaced: I can't see around the stage.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.9396, -1.0000, -0.7947, -0.5922, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.6376, -0.6971, -0.7559, -0.8141, -0.6226, -0.6809, -0.7385,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.8337, -0.8885, -0.9428,\n -0.9966, -1.0498, -0.8704, -0.9238, -0.9766, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.6667, -0.7189, -0.7707, -0.8220, -0.6547,\n -0.7061, -0.7570, -0.8076, -0.8577, -0.9074, -0.7441, -0.7939, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.9584, -1.0050, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -0.9759, -1.0211, -1.0659, -0.9173, -0.7698,\n -0.8151, -0.8601, -0.9048, -0.9492, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -0.8700, -0.9129,\n -0.9555, -0.9979, -1.0401, -1.0820, -1.1237, -0.9858, -1.0276, -1.0690,\n -0.9326, -0.9742, -1.0155, -1.0565, -1.0974, -0.9629, -1.0038, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.0215,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -1.0390, -0.9119, -0.9509, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "8", + "Fraction of T in Greenlist": "50.0%", + "z-score": "2.31", + "p value": "0.0105", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.9379, 2.3094])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The older students were bullying the younger ones, so we rescued them.\nWith pronoun replaced: We rescued the older students.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "38", + "Fraction of T in Greenlist": "19.1%", + "z-score": "-1.92", + "p value": "0.973", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.5554, -1.2247,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.5328, -1.6036, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.4697, -1.2127, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -1.9596, -2.0135, -2.0667, -2.1193, -2.1712, -1.9612, -2.0137, -2.0656,\n -2.1170, -2.1678, -1.9645, -2.0158, -2.0665, -2.1167, -2.1664, -2.2156,\n -2.0196, -2.0692, -2.1183, -2.1669, -2.2151, -2.2629, -2.3102, -2.1213,\n -2.1690, -2.2162, -2.0309, -1.8475, -1.8958, -1.9437, -1.9911, -2.0381,\n -2.0847, -2.1309, -2.1768, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.3150, -2.3586, -2.4019, -2.2323, -2.2758, -2.1082,\n -1.9420, -1.9863, -1.8220, -1.8665, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.7655, -1.8091, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -1.8676, -1.9098, -1.9518, -1.9935, -2.0349, -1.8829, -1.9245,\n -1.9658, -2.0068, -2.0476, -2.0881, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.0339, -2.0735, -2.1128, -1.9688, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.9985, -2.0369, -2.0751, -2.1131, -2.1509, -2.1884, -2.2258, -2.2630,\n -2.3000, -2.1637, -2.2008, -2.0656, -1.9311, -1.9686, -1.8352, -1.8728,\n -1.7404, -1.7780, -1.8155, -1.8527, -1.8898, -1.9267, -1.7964, -1.8333,\n -1.8701, -1.9068, -1.9432, -1.8145, -1.8511, -1.8874, -1.9236])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "133", + "Fraction of T in Greenlist": "66.8%", + "z-score": "13.6", + "p value": "1.35e-42", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.1312, 4.4096, 4.6775, 4.9358, 4.7140,\n 4.9652, 5.2085, 5.4444, 5.2372, 5.0389, 5.2705, 5.4958, 5.3072,\n 5.5277, 5.7429, 5.9530, 5.7735, 5.9797, 6.1815, 6.3791, 6.2075,\n 6.0412, 6.2361, 6.4273, 6.2668, 6.4550, 6.2993, 6.4846, 6.6667,\n 6.8457, 6.6953, 6.8718, 7.0456, 7.2168, 7.0711, 6.9286, 7.0980,\n 7.2648, 7.1261, 6.9903, 7.1554, 7.3183, 7.1857, 7.3467, 7.5056,\n 7.6624, 7.5331, 7.6883, 7.8416, 7.9931, 7.8667, 8.0167, 8.1650,\n 8.3116, 8.1881, 8.0667, 8.2121, 8.3560, 8.2369, 8.3795, 8.2624,\n 8.4037, 8.5435, 8.6820, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.7515, 8.8853, 9.0179, 8.9086, 9.0401, 9.1706, 9.2999, 9.1924,\n 9.3207, 9.4480, 9.5743, 9.4685, 9.5939, 9.7183, 9.8416, 9.7376,\n 9.8601, 9.9817, 10.1024, 10.0000, 9.8987, 10.0188, 10.1379, 10.0380,\n 10.1564, 10.0577, 10.1754, 10.2923, 10.4083, 10.3110, 10.4263, 10.5409,\n 10.6547, 10.5587, 10.4636, 10.5769, 10.6894, 10.5955, 10.7074, 10.6145,\n 10.7257, 10.8363, 10.9462, 10.8544, 10.9637, 11.0724, 11.1803, 11.0897,\n 11.1971, 11.3039, 11.4101, 11.3204, 11.4261, 11.5311, 11.6356, 11.5470,\n 11.4592, 11.5632, 11.6667, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 11.8172, 11.9181, 12.0185, 12.1184, 12.0341, 12.1335, 12.2325,\n 12.3309, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.3754, 12.4722,\n 12.5685, 12.4872, 12.5831, 12.5024, 12.5979, 12.6930, 12.7876, 12.7077,\n 12.8019, 12.8957, 12.9891, 12.9099, 13.0030, 13.0956, 13.1878, 13.1094,\n 13.2012, 13.2927, 13.3838, 13.3060, 13.2288, 13.3196, 13.4100, 13.3333,\n 13.4234, 13.3473, 13.4371, 13.5265, 13.6155, 13.5401, 13.6288])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I'm sure that my map will show this building; it is very famous.\nWith pronoun replaced: The building is very famous.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.0676, 0.1342, 0.0667, 0.0000, -0.0658, -0.1307, -0.1949, -0.2582,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.7189, -0.5505, -0.6028, -0.6547,\n -0.7061, -0.5407, -0.5922, -0.6433, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.4988, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.4845, -0.5311, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.7493,\n -0.7935, -0.8374, -0.8811, -0.9245, -0.9676, -1.0105, -1.0531, -1.0954,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -0.9742, -0.8389, -0.8805, -0.9218, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.9925, -0.8607, -0.9012, -0.9415, -0.9816, -1.0215,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -1.0890, -1.1279, -1.0000,\n -1.0390, -1.0777, -0.9509, -0.8248, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 1.3333, 1.1209, 0.9258, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.7505, 1.0541, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.4398, 2.3094, 2.1831, 2.4351, 2.6811, 2.5560,\n 2.4345, 2.6726, 2.5533, 2.7852, 3.0123, 2.8943, 2.7791, 3.0000,\n 3.2167, 3.4293, 3.6380, 3.5228, 3.4101, 3.6141, 3.8146, 3.7033,\n 3.5942, 3.7905, 3.6831, 3.8759, 3.7700, 3.9595, 4.1461, 4.0415,\n 4.2251, 4.4061, 4.3026, 4.2008, 4.1008, 4.2784, 4.1797, 4.0825,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.8819, 3.7916, 3.7025, 3.8730,\n 3.7849, 3.6979, 3.6122, 3.7796, 3.9452, 4.1090, 4.2710, 4.4313,\n 4.5899, 4.5035, 4.6603, 4.8154, 4.9691, 5.1212, 5.0350, 5.1855,\n 5.3345, 5.4822, 5.3964, 5.3116, 5.4576, 5.6023, 5.5181, 5.4349,\n 5.3526, 5.4956, 5.6373, 5.5556, 5.4747, 5.6149, 5.7540, 5.8919,\n 5.8114, 5.9481, 6.0837, 6.2183, 6.3517, 6.2716, 6.4040, 6.5354,\n 6.6658, 6.5861, 6.5072, 6.6365, 6.7648, 6.6865, 6.6089, 6.5320,\n 6.6591, 6.5828, 6.7090, 6.8343, 6.7585, 6.8828, 7.0063, 6.9310,\n 6.8564, 6.7823, 6.9048, 6.8313, 6.7584, 6.6861, 6.6144, 6.5433,\n 6.4728, 6.5939, 6.5238, 6.4543, 6.5745, 6.5054, 6.4368, 6.3688,\n 6.4880, 6.6064, 6.7242, 6.8413, 6.9577, 6.8897, 7.0054, 6.9378,\n 7.0527, 7.1670, 7.2807, 7.3937, 7.3263, 7.4386, 7.5504, 7.6615,\n 7.7720, 7.8820, 7.8147, 7.7480, 7.8572, 7.9659, 7.8995, 8.0076,\n 7.9415, 8.0490, 8.1560, 8.2624, 8.3683, 8.3024, 8.4078, 8.5126,\n 8.6169, 8.7207, 8.8240, 8.7584, 8.8612, 8.9635, 9.0653, 9.0000,\n 9.1013, 9.2022, 9.3026, 9.2376, 9.3375, 9.4370, 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I tried to paint a picture of an orchard, with lemons in the lemon trees, but they came out looking more like telephone poles.\nWith pronoun replaced: The lemons came out looking more like telephone poles.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -0.6667, -0.8006, -0.9258, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.8729, -0.5361, -0.6325, -0.7259, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.5659, -0.6405, -0.7137, -0.4714, -0.5449, -0.6172,\n -0.3824, -0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.1421, -0.2116, -0.2801, -0.3475, -0.4140, -0.4796, -0.5443,\n -0.6082, -0.6712, -0.4667, -0.5298, -0.3290, -0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.5388, -0.5955, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.7468, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.5922, -0.4288, -0.4804, -0.3189, -0.1588, 0.0000,\n 0.1575, 0.3136, 0.2603, 0.2074, 0.1549, 0.1029, 0.0512, 0.0000,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.1491, 0.0000,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.3797, -0.4257, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.2791, -0.3246, -0.3698, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.6351, -0.4974, -0.3607, -0.4045, -0.4481, -0.4914, -0.5345,\n -0.5774, -0.6199, -0.6623, -0.7044, -0.5706, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.6885, -0.7295, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.7620, -0.8022, -0.6737, -0.5459, -0.4189, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "105", + "Fraction of T in Greenlist": "52.8%", + "z-score": "9.04", + "p value": "7.49e-20", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 4.0119, 4.3231, 4.6188,\n 4.9010, 4.6268, 4.3710, 4.1312, 3.9056, 3.6927, 3.4912, 3.2998,\n 3.5796, 3.8497, 3.6667, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 5.1257, 5.3468, 5.5626, 5.7735, 5.9797, 6.1815, 6.0093, 5.8424,\n 5.6805, 5.5234, 5.3708, 5.2223, 5.0779, 5.2778, 5.4740, 5.3333,\n 5.1962, 5.3889, 5.5783, 5.4444, 5.3134, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.4870, 5.3666, 5.5442, 5.4259, 5.6009, 5.7735,\n 5.6573, 5.5432, 5.4312, 5.6011, 5.7689, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 5.9333, 6.0928, 6.2505, 6.4065, 6.3008, 6.4550,\n 6.3509, 6.5033, 6.4006, 6.5514, 6.7006, 6.8483, 6.7469, 6.8931,\n 7.0379, 7.1813, 7.0812, 7.2232, 7.3638, 7.5032, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.7555, 7.8889, 7.7937, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.1291, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.3274, 8.4526, 8.5769, 8.7003, 8.8228, 8.9444, 9.0652,\n 9.1851, 9.3042, 9.4225, 9.5400, 9.4501, 9.5668, 9.4778, 9.3897,\n 9.3024, 9.2159, 9.1302, 9.0453, 9.1615, 9.0773, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.0595, 8.9783, 9.0923, 9.0117, 8.9319, 9.0452,\n 8.9660, 8.8874, 8.8095, 8.7323, 8.6556, 8.5796, 8.5041, 8.4293,\n 8.3550, 8.2813, 8.2082, 8.1356, 8.0636, 7.9921, 8.1043, 8.0333,\n 7.9628, 8.0742, 8.1851, 8.2954, 8.4050, 8.5141, 8.6226, 8.5524,\n 8.6603, 8.7676, 8.8744, 8.8045, 8.7351, 8.6662, 8.5978, 8.7039,\n 8.6359, 8.7414, 8.8464, 8.7788, 8.7116, 8.6448, 8.5785, 8.5126,\n 8.6169, 8.7207, 8.8240, 8.7584, 8.6932, 8.7959, 8.8982, 9.0000,\n 8.9351, 9.0364, 8.9718, 8.9077, 8.8439, 8.9446, 9.0449])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Always before, Larry had helped Dad with his work. But he could not help him now, for Dad said that his boss at the railroad company would not want anyone but him to work in the office.\nWith pronoun replaced: He could not help Larry now.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 1.1209, 1.5430, 1.3416, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 0.5556, 0.4364, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.4508, -0.1782, -0.2641, -0.3482, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.0825, -0.1633, -0.2425, -0.3203, -0.3965, -0.1571, -0.2335, 0.0000,\n -0.0765, 0.1516, 0.3758, 0.2981, 0.5175, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, 0.2085, 0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.8141, -0.8716, -0.9285, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.7223, -0.7783, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.9238, -0.9766, -1.0290, -1.0809, -1.1323,\n -1.1832, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.2383, -1.2865, -1.1209, -0.9567, -1.0056, -0.8433,\n -0.8923, -0.9409, -0.9891, -1.0370, -0.8779, -0.9258, -0.7685, -0.6124,\n -0.6608, -0.7089, -0.5549, -0.6030, -0.6508, -0.6983, -0.5466, -0.3961,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.8041, -0.8485, -0.8927, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.3035, -1.3443, -1.2063, -1.2472,\n -1.2879, -1.3284, -1.3687, -1.2326, -1.0974, -1.1380, -1.1784, -1.2185,\n -1.2585, -1.2982, -1.1651, -1.0328, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.0612, -1.1007, -0.9711, -0.8422, -0.8819, -0.9215, -0.7937, -0.8333,\n -0.8727, -0.9119, -0.7856, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.1209, 1.5430, 1.3416, 1.1547,\n 1.5403, 1.3608, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.1177, 3.3968, 3.2222, 3.0551, 3.3235, 3.5839, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 5.7664, 5.9588, 6.1477, 6.3333,\n 6.1859, 6.0421, 5.9017, 5.7646, 5.6307, 5.4997, 5.6830, 5.5549,\n 5.7354, 5.6099, 5.4870, 5.3666, 5.5442, 5.7192, 5.6009, 5.4848,\n 5.6573, 5.8275, 5.9954, 6.1612, 6.3249, 6.4866, 6.6463, 6.8041,\n 6.6898, 6.5773, 6.7333, 6.8876, 6.7769, 6.9294, 7.0803, 7.2296,\n 7.1207, 7.2684, 7.4146, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.8074, 7.9455, 7.8428, 7.9796, 8.1152, 8.0139,\n 7.9138, 8.0483, 8.1816, 8.3138, 8.4449, 8.5749, 8.7039, 8.8318,\n 8.7333, 8.6359, 8.7629, 8.8889, 8.7927, 8.9178, 9.0419, 9.1652,\n 9.0702, 9.1925, 9.3140, 9.2202, 9.3408, 9.2480, 9.3678, 9.4868,\n 9.6050, 9.7224, 9.6307, 9.7473, 9.8632, 9.7725, 9.8877, 9.7980,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.4307, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.9259, 10.8423, 10.9473, 10.8644,\n 10.9689, 11.0728, 11.1761, 11.2789, 11.1968, 11.2992, 11.4009, 11.3196,\n 11.4209, 11.3402, 11.2602, 11.3610, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.9741, 11.8956, 11.8176, 11.9147, 12.0114,\n 11.9340, 12.0302, 12.1260, 12.2214, 12.1447, 12.2397, 12.3342, 12.2581,\n 12.3523, 12.2767, 12.3705, 12.4638, 12.5568, 12.6494, 12.5745, 12.6667,\n 12.7585, 12.6841, 12.7756, 12.7017, 12.6283, 12.7195, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Bernard, who had not told the government official that he was less than 21 when he filed for a homestead claim, did not consider that he had done anything dishonest. Still, anyone who knew that he was 19 years old could take his claim away from him.\nWith pronoun replaced: Bernard was less than 21 when he filed for a homestead claim.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "59", + "Fraction of T in Greenlist": "29.6%", + "z-score": "1.51", + "p value": "0.065", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.0911, 1.6330, 1.3472,\n 1.0954, 1.5667, 2.0000, 2.4019, 2.1602, 2.5342, 2.3094, 2.1004, 1.9052,\n 1.7219, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142, 1.2702, 1.1323, 1.4444,\n 1.3093, 1.1793, 1.4757, 1.7628, 2.0412, 1.9096, 2.1783, 2.0494, 2.3094,\n 2.1831, 2.0605, 1.9415, 2.1909, 2.0738, 1.9599, 1.8489, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.6667, 1.5671, 1.7963, 1.6977, 1.6013, 1.8240, 2.0428,\n 2.2576, 2.1602, 2.3706, 2.2743, 2.1798, 2.0870, 1.9959, 1.9064, 1.8185,\n 1.7321, 1.6471, 1.5635, 1.4812, 1.4003, 1.5986, 1.7942, 1.7130, 1.6330,\n 1.8245, 1.7450, 1.6667, 1.5894, 1.5133, 1.4382, 1.3641, 1.2910, 1.2189,\n 1.4027, 1.3308, 1.2599, 1.1898, 1.1206, 1.2999, 1.4771, 1.4076, 1.3389,\n 1.2710, 1.2039, 1.3770, 1.3101, 1.2439, 1.1785, 1.1138, 1.0498, 0.9864,\n 1.1547, 1.0915, 1.0290, 1.1946, 1.3587, 1.5213, 1.4580, 1.6186, 1.5556,\n 1.7143, 1.6514, 1.5892, 1.5275, 1.6837, 1.6222, 1.5613, 1.5010, 1.4412,\n 1.3819, 1.3231, 1.2649, 1.4171, 1.3590, 1.5097, 1.4517, 1.3943, 1.5430,\n 1.6906, 1.8371, 1.7792, 1.9242, 1.8664, 1.8091, 1.7522, 1.6958, 1.6398,\n 1.5842, 1.5291, 1.4744, 1.4201, 1.3663, 1.3128, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.6246, 1.5714, 1.5187, 1.4662, 1.4142, 1.3625, 1.3112, 1.2603,\n 1.2096, 1.3448, 1.2943, 1.2441, 1.1942, 1.1447, 1.2780, 1.4105, 1.3608,\n 1.3114, 1.2623, 1.2136, 1.3443, 1.2956, 1.2472, 1.1991, 1.1513, 1.1038,\n 1.0565, 1.1852, 1.1380, 1.0911, 1.2185, 1.3453, 1.4713, 1.4241, 1.5492,\n 1.5020, 1.6262, 1.5791, 1.5323, 1.4857, 1.6087, 1.5621, 1.5159, 1.4699,\n 1.4241, 1.3786, 1.3333, 1.2883, 1.4093, 1.3644, 1.4846, 1.4397, 1.3950,\n 1.5143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "160", + "# Tokens in Greenlist": "106", + "Fraction of T in Greenlist": "66.2%", + "z-score": "12", + "p value": "9.71e-34", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.6829, 3.3947, 3.7268, 4.0415,\n 4.3409, 4.6268, 4.3710, 4.1312, 3.9056, 4.1851, 4.4543, 4.2426,\n 4.0415, 4.3027, 4.5556, 4.3644, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.3886, 5.6000, 5.4322, 5.6395, 5.8424,\n 5.6805, 5.8797, 5.7229, 5.5705, 5.4222, 5.2778, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 6.0413, 6.2164, 6.3890, 6.5591, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.8031, 6.6803, 6.8419, 6.7213, 6.6030, 6.7626, 6.9204, 7.0763,\n 7.2304, 7.3827, 7.5333, 7.4174, 7.5664, 7.7139, 7.8598, 8.0042,\n 8.1471, 8.2885, 8.4285, 8.5672, 8.7045, 8.8405, 8.9753, 8.8626,\n 8.9963, 9.1287, 9.0179, 8.9086, 9.0401, 9.1706, 9.0629, 9.1924,\n 9.0863, 8.9815, 8.8780, 9.0067, 9.1343, 9.2609, 9.3865, 9.5111,\n 9.6348, 9.7574, 9.8792, 10.0000, 9.8987, 10.0188, 9.9187, 10.0380,\n 9.9392, 9.8414, 9.7447, 9.8634, 9.7678, 9.8858, 10.0029, 10.1193,\n 10.2348, 10.3496, 10.4636, 10.5769, 10.6894, 10.5955, 10.7074, 10.8186,\n 10.9291, 10.8363, 10.9462, 10.8544, 10.7635, 10.8729, 10.9816, 10.8916,\n 10.9998, 11.1073, 11.2142, 11.3204, 11.4261, 11.3373, 11.4425, 11.3546,\n 11.4592, 11.3721, 11.4762, 11.5797, 11.6827, 11.5966, 11.6990, 11.8010,\n 11.9024, 12.0032, 12.1036, 12.0185, 12.1184, 12.0341, 12.1335, 12.0499])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The drain is clogged with hair. It has to be cleaned.\nWith pronoun replaced: The drain has to be cleaned.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "20", + "Fraction of T in Greenlist": "10.1%", + "z-score": "-4.87", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.5323, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -2.2549, -2.0207,\n -2.0767, -2.1320, -2.1866, -2.2404, -2.2937, -2.3462, -2.3982, -2.4495,\n -2.5002, -2.5504, -2.6000, -2.3842, -2.4344, -2.4841, -2.5333, -2.5820,\n -2.6302, -2.6778, -2.7250, -2.7717, -2.8180, -2.8638, -2.9092, -2.7080,\n -2.7540, -2.7995, -2.8446, -2.8893, -2.9336, -2.9775, -3.0210, -3.0641,\n -3.1069, -3.1493, -3.1914, -3.0022, -3.0448, -3.0870, -3.1288, -3.1704,\n -3.2116, -3.2525, -3.2931, -3.3333, -3.3733, -3.4130, -3.4524, -3.2733,\n -3.3131, -3.3526, -3.3918, -3.4308, -3.4694, -3.5079, -3.5460, -3.5839,\n -3.6216, -3.6590, -3.6961, -3.5256, -3.5631, -3.6004, -3.6374, -3.6742,\n -3.7108, -3.7471, -3.7832, -3.8191, -3.8548, -3.8903, -3.9255, -3.7626,\n -3.7981, -3.8335, -3.8686, -3.9036, -3.9384, -3.9729, -4.0073, -4.0415,\n -4.0754, -4.1092, -4.1429, -3.9865, -4.0204, -4.0541, -4.0876, -4.1210,\n -4.1542, -4.1872, -4.2200, -4.2527, -4.2852, -4.3176, -4.3498, -4.1992,\n -4.2316, -4.2639, -4.2960, -4.3280, -4.3598, -4.3915, -4.4230, -4.4544,\n -4.4856, -4.5166, -4.5476, -4.4023, -4.4334, -4.4644, -4.4953, -4.5260,\n -4.5566, -4.5871, -4.6174, -4.6476, -4.6776, -4.7076, -4.7374, -4.5968,\n -4.6268, -4.6567, -4.6864, -4.7161, -4.7456, -4.7749, -4.8042, -4.8333,\n -4.8624, -4.8913, -4.9200, -4.7838, -4.8127, -4.8416, -4.8703])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "127", + "Fraction of T in Greenlist": "63.8%", + "z-score": "12.6", + "p value": "5.85e-37", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 0.8083, 0.6794, 1.0000, 0.8729, 0.7505, 1.0541, 1.3480, 1.6330,\n 1.9096, 2.1783, 2.0494, 2.3094, 2.5627, 2.8098, 3.0509, 3.2863,\n 3.5165, 3.3853, 3.2577, 3.4816, 3.7009, 3.9158, 3.7897, 4.0000,\n 3.8765, 4.0825, 4.2848, 4.4836, 4.6790, 4.8712, 5.0602, 4.9377,\n 5.1236, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.6009, 5.7735,\n 5.9438, 6.1118, 6.2776, 6.4413, 6.6030, 6.7626, 6.6463, 6.8041,\n 6.9601, 7.1143, 7.0000, 6.8876, 6.7769, 6.9294, 6.8205, 6.7132,\n 6.8641, 6.7583, 6.9076, 7.0553, 6.9511, 7.0973, 7.2421, 7.1393,\n 7.0379, 7.1813, 7.0812, 6.9824, 7.1243, 7.2650, 7.4044, 7.5425,\n 7.6794, 7.8150, 7.9495, 7.8520, 7.9853, 7.8889, 8.0212, 7.9259,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.4774, 8.6035, 8.5105,\n 8.4184, 8.5437, 8.4526, 8.5769, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.7652, 8.8860, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.3024, 9.4185, 9.5338, 9.6484, 9.7622, 9.6758, 9.7890, 9.7034,\n 9.8159, 9.9278, 10.0389, 10.1494, 10.2592, 10.1745, 10.0906, 10.1999,\n 10.1167, 10.2253, 10.3333, 10.2509, 10.3583, 10.4652, 10.3835, 10.4898,\n 10.4087, 10.5145, 10.6196, 10.7242, 10.8282, 10.9317, 11.0346, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.3610, 11.2816, 11.3820, 11.4819, 11.5813,\n 11.6802, 11.7787, 11.8766, 11.7980, 11.8956, 11.9927, 12.0893, 12.1854,\n 12.2812, 12.3764, 12.4713, 12.3935, 12.4880, 12.5820, 12.6757, 12.5986,\n 12.5221, 12.4460, 12.5394, 12.4638, 12.3888, 12.4818, 12.4074, 12.5000,\n 12.5923, 12.5183, 12.6102, 12.7017, 12.6283, 12.5553, 12.6465])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The drain is clogged with hair. It has to be removed.\nWith pronoun replaced: The drain has to be removed.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "21", + "Fraction of T in Greenlist": "10.6%", + "z-score": "-4.71", + "p value": "1", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.4142, -1.4791, -1.5430,\n -1.6059, -1.6678, -1.7288, -1.7889, -1.8481, -1.9064, -1.9640, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.1193, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.4228, -2.4715, -2.5198, -2.5675, -2.6148, -2.6616, -2.4618,\n -2.5092, -2.5560, -2.6025, -2.6485, -2.6941, -2.7393, -2.7840, -2.8284,\n -2.8724, -2.9161, -2.9593, -2.7713, -2.8150, -2.8583, -2.9013, -2.9439,\n -2.9862, -3.0282, -3.0698, -3.1111, -3.1521, -3.1928, -3.2332, -3.0551,\n -3.0958, -3.1363, -3.1765, -3.2163, -3.2559, -3.2953, -3.3343, -3.3731,\n -3.4116, -3.4499, -3.4879, -3.3182, -3.3566, -3.3947, -3.4325, -3.4701,\n -3.5075, -3.5446, -3.5815, -3.6181, -3.6546, -3.6908, -3.7268, -3.5645,\n -3.6008, -3.6369, -3.6728, -3.7084, -3.7439, -3.7791, -3.8142, -3.8490,\n -3.8837, -3.9181, -3.9524, -3.7966, -3.8312, -3.8655, -3.8997, -3.9337,\n -3.9675, -4.0011, -4.0345, -4.0678, -4.1009, -4.1338, -4.1666, -4.0166,\n -4.0496, -4.0825, -4.1152, -4.1477, -4.1800, -4.2122, -4.2443, -4.2762,\n -4.3079, -4.3395, -4.3710, -4.2262, -4.2578, -4.2893, -4.3207, -4.3519,\n -4.3830, -4.4140, -4.4448, -4.4754, -4.5060, -4.5364, -4.5666, -4.4265,\n -4.4570, -4.4873, -4.5175, -4.5476, -4.5776, -4.6074, -4.6371, -4.6667,\n -4.6961, -4.7255, -4.7547, -4.6188, -4.6482, -4.6775, -4.7066])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "111", + "Fraction of T in Greenlist": "55.8%", + "z-score": "10", + "p value": "5.79e-24", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.3608, 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284,\n 2.6558, 2.9439, 3.2222, 3.0551, 3.3235, 3.5839, 3.4219, 3.2660,\n 3.5176, 3.7626, 4.0012, 3.8490, 4.0814, 3.9337, 3.7905, 4.0166,\n 4.2378, 4.0980, 3.9620, 3.8297, 3.7009, 3.5753, 3.4528, 3.3333,\n 3.2167, 3.4293, 3.6380, 3.8431, 3.7273, 3.6141, 3.5032, 3.7033,\n 3.9001, 4.0937, 4.2844, 4.4721, 4.6571, 4.8394, 5.0190, 5.1962,\n 5.3709, 5.2590, 5.4312, 5.6011, 5.4909, 5.6585, 5.8241, 5.9876,\n 5.8789, 5.7719, 5.9333, 6.0928, 6.2505, 6.1450, 6.3008, 6.1968,\n 6.0943, 6.2483, 6.4006, 6.2994, 6.1996, 6.1012, 6.0041, 5.9084,\n 5.8139, 5.7207, 5.6286, 5.7785, 5.9270, 6.0740, 5.9827, 5.8926,\n 5.8035, 5.9488, 6.0927, 6.2354, 6.3768, 6.2883, 6.2008, 6.3408,\n 6.4795, 6.6171, 6.7536, 6.8889, 6.8019, 6.9361, 7.0692, 6.9830,\n 7.1149, 7.2459, 7.3758, 7.2904, 7.2058, 7.3346, 7.4625, 7.5895,\n 7.5056, 7.6315, 7.5484, 7.4661, 7.5910, 7.7152, 7.8384, 7.7567,\n 7.8791, 7.7981, 7.9196, 8.0403, 8.1602, 8.0798, 8.1989, 8.1192,\n 8.2375, 8.1585, 8.2760, 8.3927, 8.5088, 8.4303, 8.5456, 8.4678,\n 8.5824, 8.6963, 8.8095, 8.9221, 8.8448, 8.9567, 9.0679, 8.9912,\n 8.9151, 9.0257, 8.9502, 8.8752, 8.9851, 9.0944, 9.0200, 9.1287,\n 9.2368, 9.1629, 9.2704, 9.3774, 9.4837, 9.4103, 9.5161, 9.4432,\n 9.5485, 9.6532, 9.7574, 9.6850, 9.6130, 9.7167, 9.6452, 9.5743,\n 9.5038, 9.6069, 9.5369, 9.6394, 9.7415, 9.8431, 9.7735, 9.7043,\n 9.8054, 9.9060, 10.0061, 9.9374, 10.0371, 9.9687, 10.0679, 10.1667,\n 10.2650, 10.1970, 10.1295, 10.2273, 10.1602, 10.0935, 10.0272])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Emma did not pass the ball to Janie although she was open.\nWith pronoun replaced: She saw that Emma was open.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.5895, -0.6667,\n -0.4124, -0.1633, -0.2425, -0.3203, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.1466, -0.2182, -0.2887,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.2582,\n -0.0642, -0.1275, -0.1901, 0.0000, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.1217, -0.1816, -0.2408, -0.0599, -0.1191, -0.1777, -0.2357,\n -0.2931, -0.3499, -0.4062, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.2817, -0.1122, -0.1674, -0.2222, -0.2765, -0.1101, 0.0548, 0.0000,\n -0.0543, 0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.4439, -0.4915, -0.3428, -0.3904, -0.4376, -0.4845, -0.5311, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.4714, -0.5168, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.5990, -0.6430, -0.6868, -0.5477,\n -0.4095, -0.4536, -0.4974, -0.3607, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.4428, -0.3091, -0.3522, -0.3951, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.4233, -0.4644, -0.5053, -0.3780, -0.2513, -0.2924, -0.3333,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.2057, -0.2462, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "116", + "Fraction of T in Greenlist": "58.3%", + "z-score": "10.8", + "p value": "1.04e-27", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.0825,\n 4.3217, 4.1586, 4.3916, 4.2339, 4.4610, 4.6829, 4.8999, 5.1121,\n 4.9592, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.9479, 5.8140, 5.6830, 5.5549,\n 5.4295, 5.3067, 5.4870, 5.6647, 5.8398, 5.7192, 5.6009, 5.7735,\n 5.9438, 6.1118, 5.9954, 6.1612, 6.3249, 6.4866, 6.3723, 6.5320,\n 6.6898, 6.8458, 7.0000, 7.1525, 7.0401, 7.1909, 7.3401, 7.2296,\n 7.1207, 7.0133, 7.1611, 7.3073, 7.4521, 7.3464, 7.4897, 7.6317,\n 7.5275, 7.6681, 7.5653, 7.4639, 7.3638, 7.5032, 7.6413, 7.7782,\n 7.6794, 7.8150, 7.7174, 7.6210, 7.7555, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.9630, 7.8699, 8.0000, 7.9079, 7.8168, 7.7268, 7.6376,\n 7.7667, 7.8948, 8.0219, 8.1481, 8.2733, 8.3976, 8.3093, 8.4327,\n 8.5553, 8.4679, 8.5896, 8.5030, 8.4173, 8.5381, 8.6581, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.0453, 8.9612, 8.8778, 8.9940, 8.9113,\n 9.0267, 9.1414, 9.0595, 8.9783, 8.8978, 8.8179, 8.7388, 8.6603,\n 8.5824, 8.6963, 8.8095, 8.9221, 9.0340, 8.9567, 9.0679, 9.1785,\n 9.1018, 9.2118, 9.1357, 9.0601, 8.9851, 8.9107, 9.0200, 8.9461,\n 9.0548, 9.1629, 9.0895, 9.1970, 9.1242, 9.0518, 9.1587, 9.2651,\n 9.3708, 9.2990, 9.2276, 9.3328, 9.4375, 9.3665, 9.4707, 9.5743,\n 9.6774, 9.6069, 9.7095, 9.8116, 9.7415, 9.8431, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.4427, 10.5410, 10.6389, 10.7363, 10.6667,\n 10.7637, 10.8602, 10.7910, 10.8872, 10.8184, 10.9141, 10.8457])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: John was doing research in the library when he heard a man humming and whistling. He was very annoying.\nWith pronoun replaced: John was very annoying.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.8704, 0.6667, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.5396, -1.6136, -1.6859, -1.7566, -1.4606,\n -1.5328, -1.6036, -1.6729, -1.7408, -1.8074, -1.8728, -1.6001, -1.6667,\n -1.7321, -1.7963, -1.8594, -1.6013, -1.6654, -1.7285, -1.7905, -1.8516,\n -1.9118, -1.9711, -2.0294, -2.0870, -2.1437, -2.1997, -1.9640, -2.0207,\n -2.0767, -1.8477, -1.6223, -1.6803, -1.4596, -1.5181, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.4940, -1.5492,\n -1.6038, -1.6577, -1.7111, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.5823, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.4796, -1.5275,\n -1.5751, -1.4059, -1.4536, -1.5010, -1.5479, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.8220, -1.6591, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.7217, -1.7655, -1.8091, -1.6521, -1.6958, -1.5404, -1.3862,\n -1.4305, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.5822, -1.6246, -1.6667, -1.7085, -1.7500, -1.7913, -1.8324, -1.8732,\n -1.9137, -1.9540, -1.9941, -1.8490, -1.8892, -1.9291, -1.7857, -1.6432,\n -1.6836, -1.5423, -1.5828, -1.4427, -1.4832, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.4087, -1.4485, -1.4881, -1.5275, -1.3926,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.4551, -1.3230, -1.3620,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.3859, -1.4241, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "103", + "Fraction of T in Greenlist": "51.8%", + "z-score": "8.72", + "p value": "1.42e-18", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.1547, 1.5403, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142, 1.2702, 1.1323, 1.0000,\n 1.3093, 1.6082, 1.8974, 1.7628, 1.6330, 1.9096, 2.1783, 2.0494, 1.9245,\n 1.8034, 1.6859, 1.9415, 1.8257, 1.7132, 1.6036, 1.4968, 1.7408, 1.9795,\n 2.2133, 2.4422, 2.6667, 2.5568, 2.4495, 2.3445, 2.2418, 2.1412, 2.0428,\n 2.2576, 2.1602, 2.3706, 2.2743, 2.1798, 2.0870, 2.2916, 2.4930, 2.4004,\n 2.3094, 2.5064, 2.7005, 2.8919, 3.0806, 3.2667, 3.4503, 3.3574, 3.5382,\n 3.7166, 3.8927, 4.0667, 3.9736, 4.1451, 4.0531, 4.2222, 4.3894, 4.2981,\n 4.4630, 4.6262, 4.5356, 4.4462, 4.6070, 4.5186, 4.6775, 4.8347, 4.9904,\n 5.1444, 5.0562, 5.2086, 5.3594, 5.5088, 5.6569, 5.8035, 5.7155, 5.6285,\n 5.5426, 5.4576, 5.6023, 5.7457, 5.8878, 5.8034, 5.7199, 5.8605, 5.7778,\n 5.9171, 5.8351, 5.7540, 5.6737, 5.5942, 5.7318, 5.8684, 6.0038, 6.1382,\n 6.2716, 6.4040, 6.3246, 6.2459, 6.3770, 6.2990, 6.2217, 6.3517, 6.4807,\n 6.6089, 6.7361, 6.6591, 6.7854, 6.7090, 6.6332, 6.5582, 6.6833, 6.8076,\n 6.9310, 7.0537, 6.9789, 7.1007, 7.0265, 6.9529, 7.0737, 7.0007, 6.9282,\n 7.0481, 7.1673, 7.2857, 7.4034, 7.5204, 7.6368, 7.7524, 7.6800, 7.7949,\n 7.9091, 7.8372, 7.7658, 7.6950, 7.6246, 7.7380, 7.6681, 7.5988, 7.5299,\n 7.4616, 7.5740, 7.6859, 7.7971, 7.9078, 8.0178, 7.9497, 7.8820, 7.8147,\n 7.7480, 7.6816, 7.6158, 7.7249, 7.6594, 7.7679, 7.7028, 7.6381, 7.5738,\n 7.6816, 7.7889, 7.7249, 7.6613, 7.7679, 7.8740, 7.9796, 8.0847, 8.1892,\n 8.2933, 8.2298, 8.3333, 8.4364, 8.5390, 8.6411, 8.5778, 8.6794, 8.6164,\n 8.7175])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The city councilmen refused the demonstrators a permit because they advocated violence.\nWith pronoun replaced: The demonstrators advocated violence.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "31", + "Fraction of T in Greenlist": "15.6%", + "z-score": "-3.07", + "p value": "0.999", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, -0.3086, 0.1491, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, 0.1037, 0.4082,\n 0.3015, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, -0.1782, 0.0880, 0.0000, -0.0861, 0.1703, 0.0842, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.4937, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.5656, -1.6187, -1.6713, -1.7233,\n -1.7748, -1.8257, -1.8762, -1.9262, -1.9757, -2.0247, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.0309, -2.0785, -2.1256, -1.9437, -1.9911, -2.0381,\n -2.0847, -1.9066, -1.9535, -2.0000, -2.0461, -2.0918, -2.1372, -2.1822,\n -2.2268, -2.2711, -2.0997, -2.1442, -2.1884, -2.2323, -2.2758, -2.3190,\n -2.3619, -2.1954, -2.2385, -2.0739, -2.1172, -2.1602, -1.9980, -2.0412,\n -2.0841, -1.9242, -1.9673, -2.0101, -2.0526, -2.0948, -2.1367, -2.1783,\n -2.0224, -2.0642, -2.1057, -2.1470, -2.1880, -2.2287, -2.2692, -2.3094,\n -2.3494, -2.3891, -2.2381, -2.2780, -2.3176, -2.3570, -2.3962, -2.4351,\n -2.4738, -2.5123, -2.3651, -2.4037, -2.4421, -2.4803, -2.5183, -2.5560,\n -2.5936, -2.6309, -2.6681, -2.7050, -2.7417, -2.7783, -2.8146, -2.8508,\n -2.8868, -2.9225, -2.7815, -2.8174, -2.8532, -2.8887, -2.9241, -2.9593,\n -2.9943, -2.8561, -2.8913, -2.7541, -2.7894, -2.8245, -2.6888, -2.7240,\n -2.7591, -2.7940, -2.8287, -2.8633, -2.8977, -2.9320, -2.9661, -3.0000,\n -3.0338, -2.9016, -2.9355, -2.9692, -3.0028, -3.0363, -3.0695])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "142", + "Fraction of T in Greenlist": "71.4%", + "z-score": "15.1", + "p value": "7.84e-52", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.7454, 1.1547,\n 1.5403, 1.9052, 1.7219, 2.0656, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.6558, 2.9439, 2.7778, 3.0551, 3.3235, 3.5839, 3.4219, 3.6742,\n 3.9196, 4.1586, 4.0012, 4.2339, 4.4610, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.6667,\n 5.8560, 6.0421, 5.9017, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.8229, 6.6896, 6.8573, 7.0226, 7.1857, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.5615, 7.7150, 7.8667, 7.7426, 7.8928,\n 8.0413, 8.1881, 8.0667, 8.2121, 8.3560, 8.4984, 8.3795, 8.5206,\n 8.6603, 8.7986, 8.6820, 8.8192, 8.9550, 9.0896, 8.9753, 9.1088,\n 9.2410, 9.3721, 9.2600, 9.3901, 9.5191, 9.6470, 9.5368, 9.6638,\n 9.7897, 9.9146, 9.8064, 9.9304, 10.0535, 10.1756, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.3257, 10.4444, 10.5623, 10.6793, 10.5763, 10.6927,\n 10.8082, 10.9229, 10.8215, 10.9355, 11.0488, 11.1614, 11.0615, 11.1734,\n 11.2846, 11.3950, 11.2966, 11.4065, 11.5157, 11.6242, 11.5271, 11.6351,\n 11.7424, 11.8491, 11.7533, 11.8594, 11.9650, 12.0699, 11.9754, 12.0798,\n 12.1836, 12.2868, 12.1936, 12.2963, 12.3985, 12.5001, 12.4081, 12.5093,\n 12.6099, 12.7100, 12.6190, 12.7187, 12.8179, 12.9165, 12.8267, 12.9249,\n 13.0226, 13.1198, 13.0311, 13.1279, 13.2243, 13.3201, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.4308, 13.5250, 13.6188, 13.7122, 13.6264, 13.7194,\n 13.8120, 13.9042, 13.8193, 13.9111, 14.0025, 14.0936, 14.0096, 14.1003,\n 14.1906, 14.2805, 14.1974, 14.2870, 14.3762, 14.4651, 14.3828, 14.4714,\n 14.5595, 14.6473, 14.5659, 14.6534, 14.7406, 14.8274, 14.7468, 14.8333,\n 14.9195, 15.0054, 14.9255, 15.0111, 15.0964, 15.1813, 15.1022])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I couldn't put the pot on the shelf because it was too high.\nWith pronoun replaced: The pot was too high.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "43", + "Fraction of T in Greenlist": "21.6%", + "z-score": "-1.11", + "p value": "0.865", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.2722, -0.3974, -0.5164, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -0.9744, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.9847,\n -1.0404, -1.0954, -1.1499, -0.9631, -1.0178, -1.0719, -0.8885, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.0412, -0.8729,\n -0.9233, -0.9733, -1.0229, -1.0721, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.0370, -1.0844, -1.1316, -0.9734, -0.8165,\n -0.8642, -0.9115, -0.9584, -0.8040, -0.8511, -0.8978, -0.9441, -0.9901,\n -1.0359, -1.0812, -1.1263, -1.1711, -1.2155, -1.2597, -1.1105, -0.9623,\n -1.0069, -1.0512, -1.0952, -1.1390, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.1094, -1.1519, -1.1942, -1.0531, -0.9129,\n -0.9555, -0.9979, -1.0401, -0.9017, -0.9439, -0.9858, -1.0276, -1.0690,\n -1.1103, -1.1513, -1.1921, -1.2326, -1.2730, -1.3131, -1.1784, -1.0445,\n -1.0849, -1.1251, -1.1651, -1.2049, -1.0729, -1.1127, -1.1523, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.1790, -1.2179, -1.2566, -1.1279, -1.0000,\n -1.0390, -1.0777, -1.1163, -0.9897, -1.0284, -1.0668, -1.1050])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "107", + "Fraction of T in Greenlist": "53.8%", + "z-score": "9.37", + "p value": "3.55e-21", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.9593, 2.6667, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 2.9938, 3.3113, 3.6148, 3.4017, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.7556, 4.5556, 4.3644, 4.6101, 4.8488, 5.0811, 5.3072,\n 5.5277, 5.3468, 5.1723, 5.3886, 5.2204, 5.0576, 5.2697, 5.4772,\n 5.6805, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 6.1477, 6.0000,\n 6.1859, 6.0421, 6.2251, 6.0849, 5.9479, 5.8140, 5.9944, 6.1721,\n 6.0413, 6.2164, 6.0883, 6.2610, 6.4312, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.9204, 7.0763,\n 6.9601, 7.1143, 7.0000, 7.1525, 7.3033, 7.4524, 7.3401, 7.4878,\n 7.3773, 7.5234, 7.6681, 7.5593, 7.4521, 7.5954, 7.7373, 7.6317,\n 7.5275, 7.4247, 7.5653, 7.4639, 7.3638, 7.2650, 7.1674, 7.3068,\n 7.4449, 7.3485, 7.4853, 7.3901, 7.5258, 7.6603, 7.5661, 7.6995,\n 7.8318, 7.7387, 7.8699, 8.0000, 8.1291, 8.0370, 7.9460, 8.0741,\n 8.2012, 8.1111, 8.2372, 8.1481, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.3453, 8.2588, 8.3813, 8.2956, 8.2107, 8.1266, 8.0434, 7.9608,\n 8.0824, 8.2032, 8.1214, 8.2413, 8.1602, 8.2793, 8.3977, 8.3172,\n 8.4348, 8.5516, 8.4718, 8.5879, 8.7033, 8.6241, 8.5456, 8.4678,\n 8.3906, 8.5052, 8.4286, 8.3526, 8.2772, 8.2024, 8.3162, 8.4293,\n 8.5417, 8.6535, 8.5792, 8.5054, 8.6165, 8.7270, 8.6537, 8.5810,\n 8.5088, 8.6186, 8.5469, 8.4757, 8.4050, 8.3349, 8.4439, 8.5524,\n 8.4826, 8.4133, 8.3446, 8.2762, 8.2084, 8.3161, 8.2486, 8.3557,\n 8.4623, 8.3952, 8.3286, 8.4345, 8.5399, 8.4736, 8.5785, 8.6828,\n 8.7867, 8.8900, 8.8240, 8.7584, 8.8612, 8.9635, 9.0653, 9.0000,\n 9.1013, 9.2022, 9.3026, 9.2376, 9.1730, 9.2729, 9.3723])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The police arrested all of the gang members. They were trying to run the drug trade in the neighborhood.\nWith pronoun replaced: The police were trying to run the drug trade in the neighborhood.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.4434,\n -1.5038, -1.5635, -1.6223, -1.6803, -1.4596, -1.2421, -1.0278, -1.0887,\n -0.8785, -0.6712, -0.7333, -0.7947, -0.8553, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -0.9847,\n -0.7956, -0.8520, -0.9078, -0.9631, -1.0178, -1.0719, -1.1255, -1.1785,\n -1.2310, -1.2831, -1.3346, -1.3856, -1.4362, -1.4863, -1.5360, -1.5852,\n -1.4086, -1.2337, -1.2837, -1.3333, -1.3825, -1.4313, -1.2604, -1.3093,\n -1.3578, -1.4059, -1.4536, -1.5010, -1.3344, -1.3819, -1.4290, -1.4757,\n -1.5221, -1.5681, -1.4056, -1.4517, -1.4976, -1.5430, -1.5882, -1.6330,\n -1.6775, -1.5191, -1.5637, -1.6081, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.4744, -1.5181, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -2.0203, -2.0605,\n -2.1004, -1.9540, -1.9941, -2.0339, -2.0735, -2.1128, -1.9688, -2.0083,\n -2.0476, -2.0866, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.3538, -2.3912, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.2630,\n -2.1264, -2.1637, -2.2008, -2.2377, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.2497, -2.2860, -2.3221, -2.1896, -2.2258, -2.2618, -2.2977, -2.3333,\n -2.2026, -2.2384, -2.1086, -1.9795, -2.0156, -2.0515, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "140", + "Fraction of T in Greenlist": "70.4%", + "z-score": "14.8", + "p value": "1.07e-49", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.5164, 0.3780, 0.7385, 0.6019, 0.9428,\n 1.2702, 1.5852, 1.4444, 1.7457, 2.0370, 2.3190, 2.5924, 2.8577,\n 3.1156, 2.9704, 3.2205, 3.4641, 3.3221, 3.1844, 3.4207, 3.2863,\n 3.1558, 3.3853, 3.6098, 3.8297, 4.0451, 3.9158, 3.7897, 3.6667,\n 3.8765, 4.0825, 4.2848, 4.1633, 4.3618, 4.5569, 4.4374, 4.3205,\n 4.2060, 4.0937, 3.9837, 4.1740, 4.0657, 4.2528, 4.4371, 4.3301,\n 4.5115, 4.6904, 4.8669, 4.7610, 4.9348, 5.1065, 5.0019, 4.8990,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.6715, 4.8375, 5.0017, 5.1640,\n 5.0679, 5.2281, 5.3867, 5.5435, 5.6986, 5.6032, 5.5090, 5.6622,\n 5.5691, 5.7207, 5.8707, 6.0193, 6.1664, 6.3122, 6.4566, 6.3640,\n 6.5069, 6.6486, 6.7890, 6.9282, 7.0662, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.5143, 7.6466, 7.7778, 7.9079, 7.8168, 7.9460, 8.0741,\n 8.2012, 8.3274, 8.4526, 8.5769, 8.4868, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.7356, 9.8494, 9.9625, 10.0748, 10.1865, 10.2975,\n 10.4079, 10.3209, 10.4307, 10.5397, 10.6481, 10.7559, 10.8631, 10.9697,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.4080, 11.5111, 11.4263,\n 11.5290, 11.6311, 11.7326, 11.8336, 11.9341, 12.0341, 11.9504, 12.0499,\n 12.1489, 12.2474, 12.3455, 12.4430, 12.5401, 12.4575, 12.5542, 12.6504,\n 12.7461, 12.8414, 12.9363, 13.0307, 12.9491, 13.0431, 13.1367, 13.2299,\n 13.3227, 13.4150, 13.5069, 13.4263, 13.5179, 13.6091, 13.6999, 13.7904,\n 13.8804, 13.9700, 13.8904, 13.9797, 14.0687, 14.1573, 14.2455, 14.3333,\n 14.4208, 14.3422, 14.4294, 14.5162, 14.6027, 14.6889, 14.7747])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The cat was lying by the mouse hole waiting for the mouse, but it was too cautious.\nWith pronoun replaced: The mouse was too cautious.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "44", + "Fraction of T in Greenlist": "22.1%", + "z-score": "-0.941", + "p value": "0.827", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -0.8115, -0.5345, -0.6163, -0.6963, -0.4303, -0.5108, -0.5895, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.3824, -0.4549, -0.5262, -0.5963, -0.6653, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.6901, -0.4796, -0.5443,\n -0.6082, -0.4027, -0.4667, -0.5298, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.5774, -0.6376, -0.6971, -0.7559, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.3060, -0.3651, -0.4237, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.2872, -0.3430, -0.3982, -0.2265,\n -0.2817, -0.3365, -0.1674, -0.2222, -0.2765, -0.3303, -0.3836, -0.4364,\n -0.4888, -0.5407, -0.3769, -0.4288, -0.4804, -0.5315, -0.3705, -0.4216,\n -0.4724, -0.3136, -0.3644, -0.4148, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.6608, -0.5064, -0.5549, -0.6030, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.4439, -0.4915, -0.5387, -0.5855, -0.6321, -0.6783, -0.7242, -0.7698,\n -0.6233, -0.6689, -0.7143, -0.7593, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.6068, -0.6513, -0.6956, -0.7396, -0.7833, -0.8268, -0.8700, -0.7303,\n -0.7735, -0.8165, -0.8592, -0.7213, -0.7641, -0.8066, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.9218, -0.9629, -0.8292, -0.8704,\n -0.9113, -0.9520, -0.8199, -0.8607, -0.9012, -0.7703, -0.8109, -0.8513,\n -0.8914, -0.9313, -0.9711, -1.0106, -1.0499, -0.9215, -0.9608, -1.0000,\n -1.0390, -0.9119, -0.9509, -0.9897, -0.8638, -0.9027, -0.9413])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "101", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.39", + "p value": "2.43e-17", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.4910, 2.3333, 2.1822, 2.4659, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 3.7417, 3.6098, 3.4816, 3.7009, 3.5753, 3.4528, 3.6667,\n 3.5466, 3.7559, 3.6380, 3.5228, 3.7273, 3.9284, 4.1260, 4.3205,\n 4.2060, 4.0937, 3.9837, 3.8759, 4.0657, 4.2528, 4.4371, 4.3301,\n 4.2251, 4.1219, 4.0205, 3.9208, 4.1008, 4.0024, 3.9056, 4.0825,\n 3.9869, 4.1612, 4.0667, 3.9736, 4.1451, 4.3146, 4.4820, 4.6476,\n 4.5547, 4.4630, 4.3727, 4.2836, 4.4462, 4.6070, 4.7662, 4.6775,\n 4.8347, 4.9904, 5.1444, 5.0562, 5.2086, 5.3594, 5.5088, 5.4212,\n 5.3345, 5.2489, 5.1643, 5.0807, 5.2278, 5.1450, 5.0630, 5.2085,\n 5.1273, 5.2713, 5.1908, 5.1111, 5.2535, 5.3947, 5.5348, 5.6737,\n 5.5942, 5.5155, 5.4377, 5.3606, 5.4977, 5.6338, 5.7689, 5.6921,\n 5.8260, 5.9589, 6.0908, 6.0143, 6.1451, 6.2750, 6.4039, 6.3278,\n 6.4558, 6.5828, 6.7090, 6.6332, 6.5582, 6.4838, 6.4101, 6.3369,\n 6.4618, 6.3892, 6.3172, 6.4409, 6.3694, 6.4923, 6.4213, 6.3509,\n 6.4728, 6.5939, 6.7143, 6.8339, 6.7637, 6.6939, 6.6248, 6.5561,\n 6.6747, 6.7925, 6.9097, 6.8413, 6.9577, 7.0735, 7.1885, 7.1204,\n 7.2348, 7.3485, 7.4616, 7.3937, 7.3263, 7.2594, 7.1929, 7.1270,\n 7.2391, 7.1735, 7.1083, 7.2197, 7.1549, 7.2656, 7.2012, 7.1372,\n 7.2472, 7.3566, 7.4655, 7.5738, 7.5100, 7.4465, 7.3835, 7.3208,\n 7.4283, 7.5353, 7.6418, 7.5794, 7.6853, 7.7907, 7.8956, 7.8333,\n 7.9377, 8.0416, 8.1449, 8.0829, 8.1858, 8.2882, 8.3901])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam tried to paint a picture of shepherds with sheep, but they ended up looking more like golfers.\nWith pronoun replaced: The sheep ended up looking more like golfers.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -1.5430, -1.0435, -1.1547,\n -1.2603, -1.3608, -1.4570, -1.5492, -1.6378, -1.7233, -1.8058, -1.4142,\n -1.5011, -1.1323, -0.7778, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -0.6963, -0.7746, -0.8513, -0.5895, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.1342, 0.0667, 0.0000, -0.0658, -0.1307, 0.0650, 0.0000,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.0000,\n -0.0612, -0.1217, -0.1816, 0.0000, 0.1796, 0.1191, 0.0592, 0.2357,\n 0.1759, 0.3499, 0.5222, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.3944, 0.5608, 0.5023, 0.4444, 0.3871, 0.3303, 0.2740, 0.4364,\n 0.3802, 0.3244, 0.2692, 0.2144, 0.1601, 0.3189, 0.4763, 0.4216,\n 0.3674, 0.5227, 0.4685, 0.6222, 0.7746, 0.9258, 0.8709, 0.8165,\n 0.7625, 0.7089, 0.6558, 0.8040, 0.7509, 0.6983, 0.6460, 0.5941,\n 0.5426, 0.6881, 0.6366, 0.5855, 0.5348, 0.4845, 0.4345, 0.5774,\n 0.7192, 0.6689, 0.6190, 0.7593, 0.7095, 0.8485, 0.9867, 1.1239,\n 1.0735, 1.0235, 0.9739, 0.9245, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.9017, 0.8540, 0.8066, 0.7595, 0.7127,\n 0.6662, 0.7971, 0.9272, 0.8805, 0.8340, 0.9629, 0.9165, 1.0445,\n 1.1717, 1.2982, 1.2514, 1.2049, 1.1587, 1.1127, 1.0670, 1.1918,\n 1.1461, 1.1007, 1.0555, 1.0106, 0.9659, 1.0890, 1.0444, 1.0000,\n 0.9558, 0.9119, 0.8682, 0.9897, 1.1106, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "197", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.8%", + "z-score": "8.35", + "p value": "3.4e-17", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.8165,\n 0.5774, 1.0954, 1.5667, 2.0000, 1.7614, 2.1602, 2.5342, 2.8868,\n 2.6605, 2.9938, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 2.8284,\n 2.6558, 2.9439, 2.7778, 2.6186, 2.4659, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.9337, 4.1603, 4.0166,\n 3.8772, 4.0980, 3.9620, 3.8297, 3.7009, 3.9158, 3.7897, 3.6667,\n 3.5466, 3.7559, 3.9614, 3.8431, 4.0446, 4.2426, 4.4374, 4.3205,\n 4.2060, 4.3970, 4.2844, 4.1740, 4.0657, 4.2528, 4.1461, 4.0415,\n 3.9386, 4.1219, 4.3026, 4.2008, 4.3788, 4.5544, 4.7278, 4.6268,\n 4.7977, 4.9666, 5.1333, 5.0332, 4.9346, 5.0990, 5.0017, 4.9058,\n 4.8113, 4.9731, 4.8797, 4.7875, 4.6967, 4.8561, 5.0138, 4.9237,\n 5.0795, 5.2338, 5.3865, 5.2970, 5.4480, 5.5976, 5.7458, 5.6569,\n 5.5690, 5.7155, 5.6285, 5.5426, 5.4576, 5.6023, 5.5181, 5.4349,\n 5.3526, 5.4956, 5.6373, 5.5556, 5.6959, 5.8351, 5.9732, 5.8919,\n 6.0287, 6.1644, 6.2991, 6.2183, 6.1382, 6.2716, 6.1923, 6.1137,\n 6.0359, 6.1680, 6.0908, 6.0143, 5.9386, 6.0693, 6.1990, 6.1237,\n 6.2524, 6.3803, 6.5072, 6.4322, 6.5582, 6.6833, 6.8076, 6.7330,\n 6.8564, 6.9789, 7.1007, 7.0265, 6.9529, 7.0737, 7.0007, 6.9282,\n 6.8563, 6.9762, 6.9048, 6.8339, 6.7637, 6.8825, 7.0006, 6.9307,\n 7.0481, 7.1647, 7.2807, 7.2111, 7.3263, 7.4409, 7.5548, 7.4855,\n 7.5988, 7.7114, 7.8233, 7.7544, 7.6859, 7.7971, 7.7291, 7.6615,\n 7.5944, 7.7048, 7.6381, 7.5719, 7.5061, 7.6158, 7.7249, 7.6594,\n 7.7679, 7.8759, 7.9833, 7.9181, 8.0249, 8.1312, 8.2370, 8.1721,\n 8.2773, 8.3820, 8.4862, 8.4215, 8.3572, 8.4608, 8.3969, 8.3333,\n 8.2702, 8.3732, 8.3103, 8.2479, 8.3503])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Fred covered his eyes with his hands, because the wind was blowing sand around. He opened them when the wind stopped.\nWith pronoun replaced: He opened his eyes when the wind stopped.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "40", + "Fraction of T in Greenlist": "20.1%", + "z-score": "-1.6", + "p value": "0.945", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.7385, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -0.8165,\n -0.9045, -0.9901, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.0954,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.1189, -0.8513, -0.9264, -0.6667,\n -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.9610, -1.0265, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.1185, -1.1767, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -0.7559, -0.8141, -0.8716, -0.6809, -0.7385,\n -0.7956, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.8003, -0.8533, -0.9058,\n -0.9578, -1.0094, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.1406, -1.1896, -1.2383, -1.2865, -1.3344, -1.3819, -1.4290, -1.2649,\n -1.3122, -1.3590, -1.1973, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.3620, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.4305, -1.4744, -1.3222, -1.3663, -1.4100, -1.4535, -1.3036, -1.1547,\n -1.1987, -1.2423, -1.0952, -1.1390, -0.9933, -1.0371, -1.0806, -0.9366,\n -0.9802, -1.0235, -1.0666, -1.1094, -1.1519, -1.1942, -1.0531, -1.0954,\n -1.1375, -1.1794, -1.2210, -1.2623, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.6057, -1.6444, -1.6830, -1.5492, -1.5878, -1.6262, -1.4938, -1.3620,\n -1.4008, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.7410, -1.7778, -1.8145, -1.6865, -1.7233, -1.5962])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "100", + "Fraction of T in Greenlist": "50.3%", + "z-score": "8.23", + "p value": "9.65e-17", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.1602, 1.9379, 2.3094, 2.1004, 1.9052,\n 1.7219, 1.5492, 1.3859, 1.7233, 1.5650, 1.4142, 1.2702, 1.1323, 1.0000,\n 0.8729, 1.1793, 1.0541, 0.9333, 1.2247, 1.5076, 1.3862, 1.2687, 1.1547,\n 1.0441, 0.9366, 0.8321, 0.7303, 0.6312, 0.5345, 0.4402, 0.3482, 0.2582,\n 0.5108, 0.4211, 0.6667, 0.5774, 0.4899, 0.4042, 0.6405, 0.8724, 0.7857,\n 0.7006, 0.9258, 0.8412, 1.0613, 1.2778, 1.1926, 1.4045, 1.3198, 1.5275,\n 1.4434, 1.6471, 1.8477, 2.0455, 2.2404, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.0948, 2.0135, 2.2000, 2.1193, 2.3028, 2.4841, 2.6632, 2.8402, 2.7585,\n 2.9329, 3.1052, 3.0237, 2.9433, 3.1129, 3.2806, 3.2004, 3.3659, 3.5298,\n 3.6919, 3.6116, 3.7717, 3.6920, 3.8503, 3.7712, 3.9276, 4.0825, 4.2359,\n 4.3879, 4.5384, 4.4590, 4.3804, 4.3027, 4.2258, 4.1497, 4.2977, 4.2222,\n 4.3687, 4.5140, 4.6580, 4.8008, 4.7252, 4.8666, 5.0070, 4.9317, 4.8572,\n 4.9960, 5.1338, 5.0596, 5.1962, 5.3316, 5.4661, 5.3921, 5.5255, 5.4521,\n 5.5842, 5.5114, 5.6424, 5.7726, 5.9019, 6.0302, 6.1577, 6.0848, 6.0125,\n 5.9409, 5.8698, 5.7994, 5.9254, 5.8554, 5.9805, 6.1047, 6.2282, 6.3509,\n 6.2810, 6.4028, 6.5238, 6.4543, 6.3853, 6.5054, 6.6248, 6.5561, 6.6747,\n 6.6064, 6.7242, 6.6564, 6.7734, 6.8897, 7.0054, 7.1204, 7.2348, 7.1670,\n 7.0998, 7.0330, 6.9667, 6.9009, 7.0142, 6.9488, 7.0614, 7.1735, 7.2849,\n 7.3958, 7.3305, 7.4407, 7.5503, 7.4853, 7.4208, 7.5297, 7.6381, 7.5738,\n 7.6816, 7.6177, 7.7249, 7.6613, 7.7679, 7.8740, 7.9796, 8.0847, 8.1892,\n 8.1258, 8.0627, 8.0000, 7.9377, 7.8758, 7.9796, 7.9179, 8.0212, 8.1240,\n 8.2264])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: When they had eventually calmed down a bit, and had gotten home, Mr. Farley put the magic pebble in an iron safe. Some day they might want to use it , but really for now, what more could they wish for?\nWith pronoun replaced: Some day they might want to use the safe.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "52", + "Fraction of T in Greenlist": "26.1%", + "z-score": "0.368", + "p value": "0.356", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.4714,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, 0.2108, 0.1037, 0.4082,\n 0.7035, 0.9901, 0.8783, 0.7698, 0.6644, 0.5620, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.8513, 0.7579, 0.6667,\n 0.9073, 0.8165, 0.7276, 0.6405, 0.5551, 0.4714, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.2981, 0.2218, 0.1466, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.0705, 0.0000, -0.0695, -0.1380, 0.0685, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.4547, -0.5164,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.4815, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.6383, -0.6928, -0.7468, -0.5717, -0.3982, -0.4529,\n -0.5071, -0.3365, -0.3907, -0.2222, -0.2765, -0.1101, 0.0548, 0.2182,\n 0.1629, 0.1081, 0.0538, 0.0000, 0.1601, 0.1063, 0.0529, 0.0000,\n -0.0525, 0.1045, 0.0521, 0.2074, 0.1549, 0.1029, 0.2562, 0.2041,\n 0.1525, 0.1013, 0.0504, 0.0000, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.0493, -0.0983, -0.1469, -0.1952, -0.2431, -0.0969, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.1898, -0.2365, -0.2828, -0.3289, -0.3746,\n -0.4201, -0.4652, -0.3246, -0.3698, -0.4147, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, 0.0000, -0.0449, 0.0896, 0.2234, 0.3563,\n 0.3109, 0.2657, 0.2208, 0.1761, 0.3073, 0.2626, 0.2182, 0.1741,\n 0.1302, 0.2596, 0.2158, 0.3443, 0.3004, 0.2568, 0.3841, 0.3405,\n 0.2971, 0.2540, 0.2111, 0.1684, 0.1260, 0.0838, 0.0418, 0.0000,\n -0.0416, -0.0829, -0.1240, 0.0000, 0.1234, 0.2462, 0.3683])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "16", + "# Tokens in Greenlist": "12", + "Fraction of T in Greenlist": "75.0%", + "z-score": "4.62", + "p value": "1.93e-06", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 3.6556, 4.0000, 4.3235, 4.6291, 4.3231, 4.6188])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The cat was lying by the mouse hole waiting for the mouse, but it was too cautious.\nWith pronoun replaced: The cat was too cautious.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "21.1%", + "z-score": "-1.27", + "p value": "0.898", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.8704, 1.3333, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 0.8819, 1.2309, 1.0835, 0.9428,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 0.7035, 0.5941, 0.4880, 0.7698, 0.6644, 0.5620, 0.4623, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, -0.1633, 0.0808, 0.0000, -0.0793, -0.1571, 0.0778, 0.0000,\n -0.0765, -0.1516, -0.2255, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, 0.0705, 0.0000, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, -0.1342, -0.2000, -0.2649, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.1901, -0.2520, -0.3131, -0.3735, -0.4333, -0.4924,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.5388, -0.5955, -0.6516, -0.7071,\n -0.7621, -0.8165, -0.8704, -0.6928, -0.7468, -0.8003, -0.8533, -0.6794,\n -0.7325, -0.7851, -0.8372, -0.8889, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -1.0721, -1.1209, -1.1693, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -1.0844, -1.1316, -1.1783, -1.2247,\n -1.2708, -1.3166, -1.1602, -1.0050, -1.0513, -1.0973, -1.1429, -1.1882,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.2155, -1.2597, -1.3036, -1.3472,\n -1.1987, -1.2423, -1.2857, -1.3288, -1.3717, -1.4142, -1.2686, -1.1239,\n -1.1669, -1.2096, -1.2521, -1.2943, -1.3362, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.7997, -1.8383, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.6830, -1.5492, -1.5878, -1.4551, -1.4938, -1.5323,\n -1.5706, -1.4393, -1.4777, -1.5159, -1.5539, -1.5916, -1.4621, -1.5000,\n -1.5377, -1.5752, -1.4471, -1.3197, -1.3574, -1.2309, -1.2687])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "119", + "Fraction of T in Greenlist": "59.8%", + "z-score": "11.3", + "p value": "4.31e-30", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 2.5560, 2.9593, 3.3333, 3.0424, 3.3947, 3.7268, 4.0415,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.4096, 4.1851, 3.9727, 3.7712,\n 3.5796, 3.8497, 4.1111, 3.9279, 4.1812, 4.0056, 4.2515, 4.0825,\n 3.9196, 4.1586, 4.3916, 4.6188, 4.8407, 4.6829, 4.8999, 5.1121,\n 5.3199, 5.1671, 5.3708, 5.5705, 5.7664, 5.6183, 5.4740, 5.3333,\n 5.1962, 5.0623, 5.2549, 5.4444, 5.3134, 5.4997, 5.6830, 5.8635,\n 5.7354, 5.9132, 6.0883, 6.2610, 6.1355, 6.3058, 6.4738, 6.6395,\n 6.5166, 6.6803, 6.8419, 7.0014, 6.8810, 6.7626, 6.6463, 6.5320,\n 6.4195, 6.5773, 6.7333, 6.6227, 6.7769, 6.6679, 6.8205, 6.7132,\n 6.6075, 6.7583, 6.9076, 7.0553, 7.2016, 7.0973, 7.2421, 7.3855,\n 7.5275, 7.4247, 7.5653, 7.7047, 7.8428, 7.7414, 7.8782, 8.0139,\n 8.1483, 8.0483, 7.9495, 7.8520, 7.7555, 7.6603, 7.7937, 7.9259,\n 7.8318, 7.9630, 8.0931, 8.2222, 8.1291, 8.2572, 8.3843, 8.5105,\n 8.4184, 8.5437, 8.6679, 8.7913, 8.7003, 8.8228, 8.9444, 9.0652,\n 8.9752, 8.8860, 8.7978, 8.7104, 8.6238, 8.7439, 8.8631, 8.7773,\n 8.8958, 9.0134, 9.1302, 9.0453, 9.1615, 9.2768, 9.3915, 9.3074,\n 9.4213, 9.5346, 9.6471, 9.5638, 9.6757, 9.7869, 9.8975, 9.8150,\n 9.7331, 9.6519, 9.5714, 9.4916, 9.6016, 9.7109, 9.6317, 9.7405,\n 9.8486, 9.9562, 9.8776, 9.9846, 10.0910, 10.1968, 10.1189, 10.2242,\n 10.3289, 10.4330, 10.3557, 10.4594, 10.5625, 10.6650, 10.5884, 10.5123,\n 10.4367, 10.3617, 10.2872, 10.3893, 10.4909, 10.4170, 10.5181, 10.6187,\n 10.7189, 10.6455, 10.7451, 10.8444, 10.9431, 10.8702, 10.9685, 11.0663,\n 11.1637, 11.0913, 11.0194, 10.9480, 10.8770, 10.8064, 10.9034, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.1475, 11.2424, 11.3369])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: By rolling over in her upper berth, Tatyana could look over the edge of it and see her mother plainly. How very small and straight and rigid she lay in the bunk below! Her eyes were closed, but Tatyana doubted if she slept.\nWith pronoun replaced: How very small and straight and rigid her mother lay in the bunk below!\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "47", + "Fraction of T in Greenlist": "23.6%", + "z-score": "-0.45", + "p value": "0.674", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -0.2582, -0.4714, 0.2182, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, 0.3780, 0.7385, 0.6019, 0.4714,\n 0.8083, 1.1323, 1.4444, 1.3093, 1.1793, 1.0541, 0.9333, 0.8165,\n 1.1055, 1.3862, 1.2687, 1.1547, 1.0441, 0.9366, 0.8321, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.3482, 0.2582, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.3824, -0.1516, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.5601, -0.6255, -0.6901, -0.4796, -0.5443,\n -0.3379, -0.1342, -0.2000, -0.2649, -0.3290, -0.3922, -0.1949, -0.2582,\n -0.3208, -0.3825, -0.4436, -0.5040, -0.5636, -0.6226, -0.6809, -0.4924,\n -0.5508, -0.3651, -0.1816, -0.2408, -0.2993, -0.3573, -0.4146, -0.4714,\n -0.5276, -0.5832, -0.6383, -0.4619, -0.5170, -0.3430, -0.3982, -0.4529,\n -0.5071, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.9074, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -0.9891, -1.0370, -0.8779, -0.9258, -0.9734, -1.0206,\n -1.0675, -1.1140, -0.9584, -0.8040, -0.8511, -0.8978, -0.7454, -0.5941,\n -0.6412, -0.6881, -0.5387, -0.3904, -0.2431, -0.2907, -0.3380, -0.3849,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.4257, -0.4714, -0.5168, -0.5620,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.7550, -0.7971, -0.8389, -0.8805, -0.7462, -0.6128, -0.6547, -0.5222,\n -0.3906, -0.2596, -0.1295, -0.1721, -0.2146, -0.2568, -0.2988, -0.3405,\n -0.2122, -0.0847, -0.1267, -0.1684, -0.2100, -0.0838, -0.1253, -0.1667,\n -0.2078, -0.2487, -0.2894, -0.3299, -0.3702, -0.4103, -0.4502])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "138", + "Fraction of T in Greenlist": "69.3%", + "z-score": "14.4", + "p value": "1.3e-47", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 1.1547, 1.8074, 2.3570, 1.9640, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.7808, 4.0825, 4.3710, 4.6476, 4.9135, 5.1698, 4.9358, 5.1855,\n 4.9652, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 5.7155,\n 5.5277, 5.3468, 5.1723, 5.0037, 4.8407, 5.0576, 4.8999, 5.1121,\n 4.9592, 4.8107, 5.0186, 4.8742, 4.7336, 4.5968, 4.8003, 5.0000,\n 4.8662, 4.7357, 4.9316, 5.1241, 4.9962, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 5.8398, 6.0125, 5.8919, 6.0622,\n 6.2302, 6.3960, 6.5597, 6.7213, 6.6030, 6.4866, 6.3723, 6.5320,\n 6.4195, 6.3089, 6.2000, 6.3578, 6.2505, 6.4065, 6.3008, 6.1968,\n 6.3509, 6.2483, 6.1471, 6.0474, 6.1996, 6.3502, 6.2517, 6.1546,\n 6.3035, 6.4510, 6.3549, 6.5008, 6.4059, 6.5504, 6.6935, 6.8354,\n 6.9759, 7.1152, 7.0211, 7.1591, 7.0662, 7.2029, 7.3386, 7.4730,\n 7.6064, 7.7387, 7.6466, 7.7778, 7.9079, 8.0370, 8.1651, 8.2923,\n 8.4184, 8.5437, 8.6679, 8.5769, 8.7003, 8.6102, 8.7327, 8.8544,\n 8.9752, 9.0951, 9.2143, 9.1252, 9.2435, 9.3611, 9.4778, 9.5938,\n 9.7091, 9.8236, 9.9373, 10.0504, 9.9625, 10.0748, 9.9878, 10.0995,\n 10.2106, 10.3209, 10.4307, 10.5397, 10.4537, 10.5621, 10.6700, 10.7772,\n 10.8838, 10.9898, 11.0952, 11.2001, 11.3043, 11.2194, 11.3232, 11.2390,\n 11.3423, 11.4450, 11.5471, 11.6487, 11.7498, 11.6666, 11.7672, 11.8673,\n 11.9669, 12.0660, 12.1646, 12.2627, 12.3603, 12.4575, 12.3754, 12.4722,\n 12.3908, 12.4872, 12.5831, 12.6785, 12.7735, 12.8680, 12.7876, 12.8817,\n 12.9755, 13.0688, 13.1617, 13.2542, 13.3463, 13.4380, 13.5292, 13.4499,\n 13.5408, 13.4620, 13.5526, 13.6429, 13.7327, 13.8222, 13.9113, 13.8333,\n 13.9221, 14.0106, 14.0986, 14.1863, 14.2737, 14.3607, 14.4473])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Fred was supposed to run the dishwasher, but he put it off, because he wanted to watch TV. But the show turned out to be boring, so he changed his mind and turned it off.\nWith pronoun replaced: He changed his mind and turned the dishwasher off.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "48", + "Fraction of T in Greenlist": "24.1%", + "z-score": "-0.286", + "p value": "0.613", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, 0.1741, 0.6667, 1.1209, 1.5430, 1.3416, 1.1547,\n 0.9802, 0.8165, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, 0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.0949, -0.1873, -0.2774, 0.0000,\n 0.2705, 0.1782, 0.0880, 0.0000, 0.2582, 0.1703, 0.0842, 0.0000,\n -0.0825, 0.1633, 0.0808, 0.0000, -0.0793, -0.1571, -0.2335, -0.3086,\n -0.0765, -0.1516, 0.0752, 0.0000, 0.2218, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.3475, -0.1380, 0.0685, 0.2722,\n 0.4730, 0.4027, 0.3333, 0.2649, 0.1974, 0.1307, 0.0650, 0.0000,\n -0.0642, -0.1275, -0.1901, -0.2520, -0.0626, -0.1245, -0.1857, -0.2462,\n -0.3060, -0.3651, -0.4237, -0.4815, -0.2993, -0.3573, -0.4146, -0.2357,\n -0.2931, -0.3499, -0.1741, 0.0000, -0.0574, -0.1143, -0.1707, 0.0000,\n 0.1690, 0.1122, 0.0558, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.2716, -0.1081, 0.0538, 0.2144, 0.3736, 0.3189, 0.2646, 0.2108,\n 0.1575, 0.1045, 0.0521, 0.0000, -0.0516, -0.1029, -0.1537, -0.2041,\n -0.0508, -0.1013, -0.1513, -0.2010, -0.2503, -0.2993, -0.3478, -0.3961,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.2431, -0.2907, -0.1448, 0.0000,\n -0.0479, -0.0956, -0.1429, -0.1898, -0.2365, -0.0943, -0.1410, -0.1873,\n -0.2334, -0.0930, -0.1391, -0.1849, -0.2304, -0.2756, -0.1374, -0.1826,\n -0.2275, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.3482,\n -0.3906, -0.4327, -0.4747, -0.5164, -0.3862, -0.4280, -0.4695, -0.3405,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.3780, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.4548, -0.4949, -0.3702, -0.4103, -0.2865])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 3.0000, 3.4641, 3.8730, 4.2426, 3.7097, 4.0825,\n 4.4264, 4.0166, 4.3519, 4.0000, 3.6829, 3.3947, 3.1305, 2.8868,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.9056, 3.6927, 3.9727, 4.2426,\n 4.5033, 4.3027, 4.1111, 3.9279, 4.1812, 4.4272, 4.6663, 4.8990,\n 4.7237, 4.9507, 5.1723, 5.0037, 5.2204, 5.4322, 5.6395, 5.4772,\n 5.3199, 5.5234, 5.7229, 5.5705, 5.7664, 5.9588, 5.8108, 6.0000,\n 6.1859, 6.3687, 6.2251, 6.0849, 6.2651, 6.4425, 6.3058, 6.4807,\n 6.6531, 6.5196, 6.6896, 6.8573, 7.0226, 6.8924, 6.7648, 6.9282,\n 7.0895, 6.9646, 7.1240, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.5007, 7.3827, 7.5333, 7.6823, 7.5664, 7.7139, 7.8598, 7.7460,\n 7.8905, 8.0335, 8.1750, 8.0632, 7.9530, 8.0934, 8.2325, 8.1240,\n 8.2619, 8.3984, 8.2916, 8.4270, 8.5612, 8.6942, 8.5891, 8.4853,\n 8.6173, 8.7482, 8.6459, 8.7757, 8.9045, 8.8036, 8.9314, 9.0582,\n 9.1840, 9.0845, 8.9861, 9.1111, 9.2351, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.6490, 9.5543, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.8632, 9.9783, 10.0926, 10.0021,\n 9.9124, 10.0261, 10.1391, 10.0504, 10.1627, 10.2743, 10.1865, 10.2975,\n 10.4079, 10.5175, 10.4307, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.6920, 10.6076, 10.7143, 10.8204, 10.9259, 10.8423, 10.7594, 10.8644,\n 10.9689, 10.8867, 10.9906, 11.0940, 11.0125, 11.1154, 11.2178, 11.3196,\n 11.2389, 11.1588, 11.2602, 11.3610, 11.2816, 11.3820, 11.4819, 11.4031,\n 11.5026, 11.6016, 11.7000, 11.6220, 11.5444, 11.6425, 11.7401, 11.6632,\n 11.7604, 11.8571, 11.7808, 11.8771, 11.9730, 12.0685, 11.9928, 11.9176,\n 12.0127, 12.1073, 12.0327, 12.1270, 12.2209, 12.1468, 12.2403, 12.3333,\n 12.4260, 12.3525, 12.2794, 12.3718, 12.4638, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam's drawing was hung just above Tina's and it did look much better with another one below it.\nWith pronoun replaced: Tina's drawing did look much better with another one below it.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 2.3570, 2.8368, 3.2660,\n 3.6566, 3.2863, 2.9593, 2.6667, 2.4019, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.0656, 1.8898, 2.2156, 2.5281, 2.8284,\n 3.1177, 3.3968, 3.2222, 3.0551, 2.8947, 3.1623, 3.0072, 3.2660,\n 3.1156, 2.9704, 2.8301, 2.6943, 2.5627, 2.4351, 2.3113, 2.5560,\n 2.4345, 2.3163, 2.2011, 2.0889, 1.9795, 1.8728, 1.7685, 2.0000,\n 1.8970, 1.7963, 2.0211, 1.9215, 1.8240, 1.7285, 1.9462, 1.8516,\n 1.7589, 1.9711, 1.8791, 1.7889, 1.7002, 1.6131, 1.5275, 1.4434,\n 1.3606, 1.2792, 1.1991, 1.4003, 1.3206, 1.2421, 1.1648, 1.0887,\n 1.0136, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.8926, 0.8238, 0.7559, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.6732, 0.6086, 0.7868, 0.7223, 0.6586, 0.5955, 0.5331, 0.4714,\n 0.4103, 0.3499, 0.2901, 0.2309, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.2791, 0.2222, 0.1659, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, 0.0538, 0.0000, -0.0534, -0.1063, -0.1588, -0.2108,\n -0.2624, -0.1045, -0.1562, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.5064, -0.5549, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.9173, -0.9623,\n -1.0069, -0.8601, -0.9048, -0.9492, -0.9933, -1.0371, -1.0806, -1.1239,\n -1.1669, -1.2096, -1.0666, -1.1094, -1.1519, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.4656, -1.5055, -1.5453, -1.5848, -1.6241, -1.6632, -1.7021, -1.7408,\n -1.7792, -1.8175, -1.6830, -1.7213, -1.7595, -1.7974, -1.8352, -1.8728,\n -1.9101, -1.9473, -1.9843, -2.0212, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.0364, -2.0726, -2.1086, -2.1444, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "126", + "Fraction of T in Greenlist": "63.3%", + "z-score": "12.5", + "p value": "4.63e-36", + "z-score_at_T": "tensor([-0.5774, 0.8165, 1.6667, 2.3094, 2.8402, 3.2998, 3.7097, 4.0825,\n 3.6566, 3.2863, 3.6556, 3.3333, 3.0424, 3.3947, 3.7268, 3.4641,\n 3.2206, 3.5382, 3.8411, 4.1312, 4.4096, 4.6775, 4.4543, 4.2426,\n 4.5033, 4.3027, 4.5556, 4.8008, 5.0389, 5.2705, 5.0811, 4.8990,\n 4.7237, 4.5547, 4.3916, 4.6188, 4.8407, 4.6829, 4.5301, 4.7469,\n 4.9592, 5.1671, 5.0186, 5.2223, 5.4222, 5.6183, 5.4740, 5.3333,\n 5.5261, 5.3889, 5.5783, 5.7646, 5.6307, 5.4997, 5.3716, 5.5549,\n 5.7354, 5.9132, 6.0883, 5.9628, 6.1355, 6.3058, 6.1828, 6.0622,\n 6.2302, 6.3960, 6.2776, 6.1612, 6.3249, 6.4866, 6.6463, 6.5320,\n 6.6898, 6.5773, 6.7333, 6.8876, 7.0401, 7.1909, 7.0803, 6.9714,\n 7.1207, 7.2684, 7.1611, 7.3073, 7.4521, 7.5954, 7.7373, 7.8779,\n 8.0171, 8.1550, 8.2916, 8.1862, 8.0822, 7.9796, 8.1152, 8.2496,\n 8.3828, 8.5149, 8.4138, 8.3138, 8.2151, 8.3463, 8.4763, 8.3789,\n 8.5079, 8.6359, 8.7629, 8.8889, 9.0139, 9.1380, 9.2611, 9.3834,\n 9.2874, 9.4088, 9.5294, 9.4346, 9.3408, 9.4606, 9.5795, 9.6977,\n 9.6050, 9.7224, 9.8389, 9.7473, 9.6566, 9.5668, 9.6828, 9.7980,\n 9.7091, 9.8236, 9.9373, 10.0504, 9.9625, 9.8753, 9.7890, 9.9015,\n 10.0133, 10.1243, 10.2348, 10.3445, 10.4537, 10.5621, 10.4769, 10.5848,\n 10.6920, 10.7987, 10.7143, 10.8204, 10.9259, 11.0309, 11.1352, 11.0517,\n 11.1556, 11.0728, 11.1761, 11.0940, 11.1968, 11.2992, 11.2178, 11.1370,\n 11.2389, 11.3402, 11.4411, 11.5414, 11.4614, 11.3820, 11.4819, 11.5813,\n 11.5026, 11.6016, 11.5234, 11.6220, 11.7200, 11.6425, 11.5655, 11.4891,\n 11.5868, 11.5109, 11.4356, 11.5329, 11.6297, 11.7261, 11.8221, 11.7473,\n 11.6731, 11.7687, 11.8638, 11.9586, 12.0529, 12.1468, 12.2403, 12.1667,\n 12.2598, 12.3525, 12.4448, 12.3718, 12.4638, 12.3912, 12.4828])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Papa looked down at the children's faces, so puzzled and sad now. It was bad enough that they had to be denied so many things because he couldn't afford them.\nWith pronoun replaced: He couldn't afford the things.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "49", + "Fraction of T in Greenlist": "24.6%", + "z-score": "-0.123", + "p value": "0.549", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 2.0466, 2.3570,\n 2.6558, 2.4910, 2.7778, 2.6186, 2.8947, 2.7406, 2.5924, 2.4495,\n 2.3116, 2.1783, 2.0494, 2.3094, 2.1831, 2.0605, 1.9415, 1.8257,\n 1.7132, 1.9599, 1.8489, 1.7408, 1.6353, 1.5323, 1.4317, 1.3333,\n 1.2372, 1.4697, 1.3744, 1.6013, 1.8240, 2.0428, 2.2576, 2.4689,\n 2.3706, 2.2743, 2.1798, 2.0870, 2.2916, 2.4930, 2.6914, 2.5981,\n 2.7928, 2.7005, 2.8919, 2.8006, 2.7107, 2.6222, 2.5352, 2.4495,\n 2.3651, 2.2819, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 1.8074,\n 1.7321, 1.6577, 1.5843, 1.5119, 1.4403, 1.6187, 1.5475, 1.4771,\n 1.4076, 1.3389, 1.2710, 1.2039, 1.3770, 1.3101, 1.2439, 1.1785,\n 1.1138, 1.0498, 0.9864, 0.9238, 0.8617, 0.8003, 0.9671, 0.9058,\n 0.8452, 0.7851, 0.7256, 0.8889, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.8147, 0.7570, 0.6999, 0.6433, 0.5871, 0.5315, 0.4763, 0.4216,\n 0.3674, 0.3136, 0.2603, 0.4148, 0.5680, 0.7201, 0.6660, 0.6124,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.2993, 0.2485, 0.1980,\n 0.1480, 0.0983, 0.0490, 0.0000, -0.0486, -0.0969, 0.0483, 0.0000,\n -0.0479, 0.0956, 0.2381, 0.1898, 0.1419, 0.0943, 0.0470, 0.1873,\n 0.1400, 0.2791, 0.2319, 0.3698, 0.5069, 0.4593, 0.4121, 0.3651,\n 0.3185, 0.2722, 0.4070, 0.3607, 0.3146, 0.2689, 0.2234, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.4828, 0.4377, 0.3928, 0.5222,\n 0.4774, 0.4327, 0.3884, 0.3443, 0.3004, 0.2568, 0.2134, 0.1703,\n 0.1273, 0.0847, 0.0422, 0.0000, 0.1260, 0.2513, 0.2089, 0.1667,\n 0.1247, 0.0829, 0.0413, 0.0000, -0.0411, -0.0821, -0.1228])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "73", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "63.0%", + "z-score": "7.5", + "p value": "3.17e-14", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660, 3.6566,\n 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 4.9193, 4.6188, 4.3409, 4.0825,\n 4.3710, 4.6476, 4.9135, 4.6775, 4.9358, 4.7140, 4.9652, 4.7556, 5.0000,\n 5.2372, 5.4678, 5.2705, 5.0811, 4.8990, 4.7237, 4.9507, 5.1723, 5.0037,\n 5.2204, 5.4322, 5.2697, 5.4772, 5.6805, 5.5234, 5.7229, 5.9186, 6.1107,\n 6.2993, 6.4846, 6.6667, 6.8457, 7.0219, 6.8718, 6.7254, 6.5823, 6.4425,\n 6.6172, 6.7893, 6.9589, 6.8229, 6.9903, 6.8573, 7.0226, 6.8924, 7.0557,\n 7.2169, 7.3760, 7.5331, 7.4061, 7.2815, 7.4370, 7.5907, 7.4686, 7.6206,\n 7.5007])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Sam took French classes from Adam, because he was known to speak it fluently.\nWith pronoun replaced: Sam was known to speak it fluently.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "61", + "Fraction of T in Greenlist": "30.7%", + "z-score": "1.84", + "p value": "0.0328", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774, 0.9802, 0.8165,\n 1.1921, 1.0328, 1.3859, 1.7233, 2.0466, 1.8856, 1.7321, 2.0381, 2.3333,\n 2.6186, 2.8947, 2.7406, 2.5924, 2.4495, 2.7136, 2.5744, 2.8301, 2.6943,\n 2.9424, 3.1844, 3.4207, 3.2863, 3.1558, 3.0290, 2.9055, 2.7852, 2.6681,\n 2.5538, 2.7791, 2.6667, 2.8868, 2.7761, 2.6679, 2.5621, 2.7757, 2.6713,\n 2.8804, 2.7775, 2.9824, 3.1840, 3.3824, 3.2796, 3.1787, 3.0796, 2.9823,\n 2.8868, 2.7928, 2.7005, 2.8919, 2.8006, 2.9887, 2.8983, 2.8093, 2.7217,\n 2.9057, 2.8189, 3.0000, 2.9140, 3.0924, 3.2686, 3.4427, 3.3566, 3.2717,\n 3.1879, 3.1052, 3.0237, 2.9433, 2.8638, 3.0330, 2.9542, 3.1211, 3.0429,\n 2.9656, 2.8893, 3.0533, 2.9775, 3.1394, 3.0641, 3.2242, 3.3826, 3.5396,\n 3.4641, 3.3895, 3.3156, 3.2426, 3.1704, 3.0989, 3.0282, 3.1814, 3.1111,\n 3.2627, 3.1928, 3.1236, 3.0551, 3.2044, 3.1363, 3.2841, 3.2163, 3.3627,\n 3.5079, 3.6519, 3.5839, 3.5166, 3.4499, 3.3838, 3.3182, 3.2533, 3.1889,\n 3.3301, 3.2660, 3.4058, 3.3420, 3.2788, 3.4171, 3.3542, 3.2918, 3.2299,\n 3.1685, 3.1076, 3.0471, 2.9872, 2.9277, 3.0632, 3.0039, 2.9451, 2.8868,\n 2.8288, 2.7713, 2.7143, 2.6576, 2.6014, 2.5456, 2.4902, 2.4351, 2.3805,\n 2.3262, 2.4578, 2.4037, 2.5343, 2.6640, 2.6099, 2.5560, 2.5026, 2.4495,\n 2.3967, 2.5247, 2.6519, 2.5990, 2.5466, 2.4944, 2.4426, 2.3912, 2.5166,\n 2.4653, 2.4142, 2.3635, 2.3131, 2.4371, 2.3868, 2.3368, 2.2871, 2.2377,\n 2.1886, 2.1398, 2.0913, 2.0430, 1.9950, 2.1167, 2.0688, 2.1896, 2.3098,\n 2.2618, 2.2141, 2.1667, 2.1195, 2.0726, 2.0259, 1.9795, 1.9333, 1.8874,\n 1.8417])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "114", + "Fraction of T in Greenlist": "57.3%", + "z-score": "10.5", + "p value": "3.56e-26", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 1.0954, 0.8704, 1.3333, 1.7614, 1.5430, 1.3416, 1.7321,\n 2.1004, 1.9052, 1.7219, 2.0656, 2.3938, 2.7080, 3.0096, 3.2998,\n 3.5796, 3.3968, 3.2222, 3.0551, 2.8947, 3.1623, 3.4219, 3.6742,\n 3.5176, 3.7626, 4.0012, 4.2339, 4.0814, 4.3083, 4.5301, 4.3818,\n 4.5985, 4.8107, 5.0186, 4.8742, 5.0779, 4.9373, 4.8003, 4.6667,\n 4.8662, 5.0623, 5.2549, 5.1241, 5.3134, 5.4997, 5.3716, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 5.8398, 6.0125, 5.8919, 5.7735,\n 5.6573, 5.8275, 5.7133, 5.8812, 6.0469, 6.2106, 6.3723, 6.2598,\n 6.1492, 6.0404, 5.9333, 6.0928, 5.9874, 5.8835, 6.0410, 5.9386,\n 6.0943, 6.2483, 6.1471, 6.2994, 6.4501, 6.5993, 6.7469, 6.6469,\n 6.5483, 6.4510, 6.3549, 6.5008, 6.6454, 6.5504, 6.6935, 6.8354,\n 6.7414, 6.8819, 7.0211, 7.1591, 7.2960, 7.2029, 7.1110, 7.2466,\n 7.3810, 7.2900, 7.4233, 7.3333, 7.2443, 7.1563, 7.0692, 7.2012,\n 7.3322, 7.2459, 7.3758, 7.5048, 7.4193, 7.5472, 7.6742, 7.8003,\n 7.9254, 7.8406, 7.7566, 7.8808, 8.0042, 8.1266, 8.2483, 8.1650,\n 8.0824, 8.0006, 7.9196, 8.0403, 8.1602, 8.0798, 8.1989, 8.3172,\n 8.2375, 8.3550, 8.4718, 8.5879, 8.7033, 8.6241, 8.5456, 8.6603,\n 8.7742, 8.8874, 9.0000, 8.9221, 8.8448, 8.7681, 8.6921, 8.8039,\n 8.9151, 8.8396, 8.9502, 9.0601, 8.9851, 9.0944, 9.2032, 9.3113,\n 9.4188, 9.3443, 9.2704, 9.3774, 9.4837, 9.5896, 9.6948, 9.6214,\n 9.5485, 9.4761, 9.4042, 9.5089, 9.6130, 9.5416, 9.6452, 9.7483,\n 9.6774, 9.7800, 9.8821, 9.9837, 10.0848, 10.0143, 9.9442, 10.0448,\n 10.1450, 10.2447, 10.3439, 10.2743, 10.2050, 10.1363, 10.0679, 10.1667,\n 10.2650, 10.1970, 10.2949, 10.3923, 10.3248, 10.4217, 10.5183])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The journalists interviewed the stars of the new movie. They were very persistent, so the interview lasted for a long time.\nWith pronoun replaced: The journalists were very persistent, so the interview lasted for a long time.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -1.2185, -1.3333, -1.4412, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.5852, -1.6667, -1.3093, -1.3937, -1.4757, -1.5554, -1.6330,\n -1.7086, -1.7823, -1.8542, -1.9245, -1.9932, -2.0605, -2.1264, -2.1909,\n -2.2542, -1.9599, -2.0250, -2.0889, -2.1517, -2.2133, -2.2738, -2.3333,\n -2.3919, -2.1229, -2.1828, -2.2418, -1.9826, -2.0428, -2.1019, -2.1602,\n -2.2177, -2.2743, -2.3301, -2.0870, -2.1437, -2.1997, -2.2549, -2.3094,\n -2.3632, -2.4163, -2.4687, -2.2404, -2.2937, -2.3462, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.1712, -2.2226, -2.2735, -2.3238,\n -2.3736, -2.1678, -1.9645, -1.7638, -1.8161, -1.8677, -1.9189, -1.7233,\n -1.7748, -1.8257, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.6499,\n -1.4655, -1.2831, -1.3346, -1.3856, -1.4362, -1.2577, -1.3084, -1.3587,\n -1.1832, -1.2337, -1.2837, -1.1111, -0.9401, -0.9909, -1.0412, -1.0911,\n -0.9233, -0.9733, -1.0229, -0.8577, -0.6939, -0.7441, -0.5822, -0.6325,\n -0.4724, -0.5227, -0.3644, -0.4148, -0.4648, -0.5143, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.1513, -0.2010, -0.0501, -0.0998, 0.0497, 0.0000,\n 0.1480, 0.0983, 0.0490, 0.0000, 0.1459, 0.0969, 0.0483, 0.1925,\n 0.3356, 0.2867, 0.4286, 0.3797, 0.5203, 0.4714, 0.6108, 0.5620,\n 0.5134, 0.4652, 0.6029, 0.5547, 0.5069, 0.6430, 0.5952, 0.5477,\n 0.5005, 0.4536, 0.4070, 0.5410, 0.4944, 0.4481, 0.4021, 0.3563,\n 0.3109, 0.4428, 0.3974, 0.3522, 0.3073, 0.2626, 0.2182, 0.3482,\n 0.4774, 0.4327, 0.5610, 0.5164, 0.4721, 0.5991, 0.5548, 0.5108,\n 0.6367, 0.7620, 0.7177, 0.6737, 0.6299, 0.7539, 0.7102, 0.6667,\n 0.7896, 0.9119, 0.8682, 0.9897, 0.9461, 1.0668, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "108", + "Fraction of T in Greenlist": "54.3%", + "z-score": "9.54", + "p value": "7.42e-22", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 1.1547, 1.8074, 1.4142, 1.9640, 1.6330, 1.3472,\n 1.0954, 0.8704, 0.6667, 0.4804, 0.3086, 0.7454, 1.1547, 1.5403, 1.3608,\n 1.1921, 1.5492, 1.8898, 2.2156, 2.5281, 2.8284, 3.1177, 2.9439, 3.2222,\n 3.0551, 3.3235, 3.5839, 3.8367, 4.0825, 3.9196, 3.7626, 4.0012, 3.8490,\n 3.7017, 3.9337, 3.7905, 3.6515, 3.5165, 3.3853, 3.6098, 3.8297, 4.0451,\n 4.2563, 4.1265, 4.0000, 4.2064, 4.4091, 4.6082, 4.8038, 4.6790, 4.8712,\n 4.7488, 4.9377, 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.4259, 5.3100,\n 5.4848, 5.3709, 5.5432, 5.4312, 5.3211, 5.2129, 5.1065, 5.2760, 5.4433,\n 5.6086, 5.7719, 5.6667, 5.5630, 5.7242, 5.8835, 6.0410, 5.9386, 6.0943,\n 6.2483, 6.1471, 6.2994, 6.1996, 6.3502, 6.4993, 6.6469, 6.7931, 6.6944,\n 6.5970, 6.7416, 6.6454, 6.7886, 6.6935, 6.5997, 6.5069, 6.4153, 6.5569,\n 6.6973, 6.8364, 6.9743, 6.8834, 6.7937, 6.9303, 7.0657, 7.2001, 7.1111,\n 7.0231, 7.1563, 7.0692, 7.2012, 7.1149, 7.2459, 7.3758, 7.5048, 7.6328,\n 7.5472, 7.4625, 7.5895, 7.5056, 7.6315, 7.5484, 7.4661, 7.3845, 7.3037,\n 7.4286, 7.5526, 7.6758, 7.7981, 7.7178, 7.6383, 7.7597, 7.8803, 8.0002,\n 7.9212, 7.8429, 7.9619, 7.8842, 8.0024, 7.9253, 8.0427, 8.1594, 8.2754,\n 8.3906, 8.3140, 8.2381, 8.3526, 8.2772, 8.3910, 8.3162, 8.2420, 8.1683,\n 8.0952, 8.2082, 8.3205, 8.4322, 8.5433, 8.4706, 8.3984, 8.5088, 8.6186,\n 8.7278, 8.6560, 8.5848, 8.6933, 8.6226, 8.7305, 8.6603, 8.7676, 8.8744,\n 8.9806, 9.0863, 9.0164, 8.9469, 9.0520, 8.9830, 9.0876, 9.0190, 8.9509,\n 8.8832, 8.8160, 8.9199, 9.0233, 9.1262, 9.2287, 9.1617, 9.0952, 9.1971,\n 9.2986, 9.3995, 9.3333, 9.2676, 9.3680, 9.3026, 9.4026, 9.3375, 9.4370,\n 9.5361])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: They broadcast an announcement, but a subway came into the station and I couldn't hear it.\nWith pronoun replaced: I couldn't hear the subway.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -0.8165, -0.9272, -1.0328, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.8729, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -0.8402, -0.6255, -0.6901, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -0.8141, -0.8716, -0.9285, -0.7385,\n -0.5508, -0.6086, -0.6658, -0.7223, -0.7783, -0.8337, -0.8885, -0.7071,\n -0.5276, -0.5832, -0.6383, -0.6928, -0.5170, -0.5717, -0.6258, -0.6794,\n -0.7325, -0.5608, -0.6140, -0.6667, -0.7189, -0.7707, -0.8220, -0.8729,\n -0.7061, -0.7570, -0.8076, -0.6433, -0.4804, -0.5315, -0.5822, -0.6325,\n -0.6823, -0.7318, -0.7809, -0.6222, -0.4648, -0.5143, -0.5635, -0.6124,\n -0.4575, -0.5064, -0.5549, -0.6030, -0.6508, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.6321, -0.6783, -0.7242, -0.5774,\n -0.4315, -0.4778, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.4201, -0.4652, -0.5101, -0.5547, -0.4147, -0.4593, -0.5037, -0.5477,\n -0.5915, -0.4536, -0.4974, -0.5410, -0.5843, -0.6274, -0.6702, -0.7127,\n -0.5774, -0.6199, -0.6623, -0.5283, -0.3951, -0.4377, -0.4801, -0.5222,\n -0.5642, -0.6058, -0.6473, -0.5164, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.3820, -0.4233, -0.4644, -0.5053, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.5347, -0.5744, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "9", + "# Tokens in Greenlist": "4", + "Fraction of T in Greenlist": "44.4%", + "z-score": "1.35", + "p value": "0.089", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 1.3472])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Fred and Alice had very warm down coats, but they were not enough for the cold in Alaska.\nWith pronoun replaced: coats were not enough for the cold in Alaska.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "109", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "32.1%", + "z-score": "1.71", + "p value": "0.0432", + "z-score_at_T": "tensor([1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774, 0.9802, 0.8165,\n 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142, 1.2702, 1.1323, 1.4444,\n 1.7457, 1.6082, 1.4757, 1.3480, 1.2247, 1.5076, 1.3862, 1.2687, 1.1547,\n 1.0441, 0.9366, 1.2019, 1.0954, 0.9918, 1.2472, 1.4968, 1.7408, 1.6353,\n 1.5323, 1.4317, 1.6667, 1.8970, 1.7963, 2.0211, 1.9215, 1.8240, 1.7285,\n 1.6348, 1.5430, 1.7589, 1.6678, 1.5785, 1.7889, 1.7002, 1.6131, 1.5275,\n 1.7321, 1.6471, 1.5635, 1.7634, 1.9604, 1.8766, 1.7942, 1.9870, 1.9052,\n 1.8245, 2.0135, 2.2000, 2.1193, 2.0397, 1.9612, 1.8838, 2.0656, 1.9887,\n 1.9127, 1.8378, 1.7638, 1.6908, 1.8677, 1.7951, 1.7233, 1.8972, 1.8257,\n 1.9973, 1.9262, 1.8559, 2.0247, 2.1917, 2.1213, 2.0517, 1.9829, 1.9149,\n 2.0785, 2.0107, 1.9437, 1.8773, 1.8116, 1.7467, 1.9066, 1.8419, 1.7778,\n 1.7143])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "101", + "# Tokens in Greenlist": "42", + "Fraction of T in Greenlist": "41.6%", + "z-score": "3.85", + "p value": "5.93e-05", + "z-score_at_T": "tensor([1.7321, 2.4495, 3.0000, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165, 0.5774,\n 1.0954, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774, 0.9802, 0.8165,\n 0.6623, 1.0328, 1.3859, 1.2309, 1.0835, 1.4142, 1.7321, 1.5852, 1.4444,\n 1.3093, 1.6082, 1.8974, 2.1776, 2.4495, 2.3116, 2.1783, 2.0494, 2.3094,\n 2.1831, 2.0605, 2.3113, 2.1909, 2.0738, 1.9599, 2.2011, 2.0889, 1.9795,\n 2.2133, 2.4422, 2.3333, 2.2269, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570,\n 2.5690, 2.7775, 2.9824, 3.1840, 3.0817, 2.9814, 2.8830, 3.0796, 2.9823,\n 2.8868, 3.0793, 2.9848, 2.8919, 2.8006, 2.9887, 2.8983, 2.8093, 2.9938,\n 3.1760, 3.0873, 3.0000, 3.1789, 3.3556, 3.2686, 3.1829, 3.0984, 3.2717,\n 3.4429, 3.6122, 3.7796, 3.6947, 3.6109, 3.5282, 3.6927, 3.6107, 3.5298,\n 3.6919, 3.6116, 3.5322, 3.4538, 3.6133, 3.5355, 3.4586, 3.6159, 3.7717,\n 3.6950, 3.8490])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The father carried the sleeping boy in his arms.\nWith pronoun replaced: The father carried the sleeping boy in the father's arms.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "56", + "Fraction of T in Greenlist": "28.1%", + "z-score": "1.02", + "p value": "0.153", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, -0.3651, -0.5222, 0.0000, -0.1601, -0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.6299, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, 0.0842, 0.0000,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.4714, 0.7006, 0.6172,\n 0.5353, 0.4549, 0.3758, 0.2981, 0.2218, 0.4399, 0.3637, 0.2887,\n 0.2148, 0.1421, 0.3527, 0.2801, 0.4865, 0.4140, 0.3426, 0.5443,\n 0.4730, 0.6712, 0.6000, 0.5298, 0.7237, 0.6537, 0.5846, 0.7746,\n 0.9623, 1.1476, 1.0773, 1.0079, 0.9393, 0.8716, 0.8047, 0.7385,\n 0.9180, 1.0954, 1.0289, 1.2039, 1.1375, 1.0719, 1.0070, 0.9428,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.6794,\n 0.6198, 0.5608, 0.7256, 0.6667, 0.8295, 0.7707, 0.7124, 0.6547,\n 0.5974, 0.7570, 0.9152, 0.8577, 1.0141, 0.9567, 0.8997, 0.8433,\n 0.7873, 0.7318, 0.6768, 0.8296, 0.7746, 0.7201, 0.6660, 0.6124,\n 0.7625, 0.7089, 0.8575, 0.8040, 0.7509, 0.8978, 0.8447, 0.7921,\n 0.9372, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.7698,\n 0.7192, 0.6689, 0.6190, 0.5695, 0.7095, 0.6600, 0.7987, 0.7493,\n 0.7001, 0.8374, 0.7884, 0.7396, 0.8755, 0.8268, 0.7784, 0.7303,\n 0.6825, 0.6351, 0.5879, 0.7213, 0.6742, 0.6274, 0.5808, 0.5345,\n 0.6662, 0.6199, 0.7506, 0.7044, 0.6584, 0.7878, 0.7419, 0.8704,\n 0.9981, 0.9520, 0.9062, 0.8607, 0.8154, 0.7703, 0.7255, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.6737, 0.7979, 0.7539, 0.8773, 0.8333,\n 0.7896, 0.9119, 0.8682, 0.9897, 0.9461, 0.9027, 1.0232])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "83", + "Fraction of T in Greenlist": "41.7%", + "z-score": "5.44", + "p value": "2.61e-08", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -0.4714, -0.6547, -0.8165,\n -0.1925, 0.3651, 0.8704, 0.6667, 1.1209, 0.9258, 0.7454, 0.5774,\n 0.4201, 0.8165, 0.6623, 0.5164, 0.8819, 0.7385, 0.6019, 0.4714,\n 0.3464, 0.6794, 0.5556, 0.8729, 0.7505, 1.0541, 1.3480, 1.2247,\n 1.5076, 1.7823, 2.0494, 2.3094, 2.5627, 2.4351, 2.3113, 2.1909,\n 2.4345, 2.3163, 2.2011, 2.0889, 1.9795, 2.2133, 2.1054, 2.3333,\n 2.2269, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570, 2.2576, 2.4689,\n 2.3706, 2.5775, 2.4804, 2.3851, 2.5873, 2.4930, 2.4004, 2.3094,\n 2.2200, 2.4163, 2.3276, 2.2404, 2.4327, 2.6222, 2.8093, 2.9938,\n 2.9057, 3.0873, 3.2667, 3.4438, 3.6187, 3.5301, 3.4427, 3.3566,\n 3.5283, 3.4429, 3.3587, 3.2757, 3.1937, 3.3619, 3.2806, 3.2004,\n 3.3659, 3.5298, 3.6919, 3.6116, 3.5322, 3.4538, 3.3764, 3.2998,\n 3.2242, 3.3826, 3.3075, 3.2332, 3.3895, 3.3156, 3.2426, 3.1704,\n 3.0989, 3.2525, 3.1814, 3.1111, 3.2627, 3.4130, 3.5620, 3.7097,\n 3.6389, 3.7852, 3.9302, 4.0740, 4.2167, 4.1457, 4.0753, 4.0056,\n 4.1464, 4.0771, 4.0085, 3.9404, 3.8730, 4.0119, 3.9448, 3.8784,\n 4.0158, 4.1522, 4.2877, 4.2212, 4.1552, 4.0898, 4.0249, 3.9606,\n 3.8968, 4.0301, 3.9666, 3.9036, 4.0356, 3.9729, 3.9107, 3.8490,\n 3.7878, 3.9181, 3.8571, 3.7966, 3.9258, 4.0541, 4.1816, 4.3083,\n 4.2475, 4.3733, 4.4983, 4.6225, 4.7460, 4.6850, 4.6245, 4.5644,\n 4.6867, 4.6268, 4.5674, 4.5083, 4.4497, 4.5707, 4.5123, 4.4544,\n 4.5744, 4.6938, 4.8125, 4.7544, 4.6968, 4.6395, 4.5826, 4.5260,\n 4.4698, 4.5871, 4.5311, 4.4754, 4.5918, 4.5364, 4.4813, 4.4265,\n 4.3721, 4.4873, 4.4331, 4.3792, 4.4936, 4.6074, 4.7206, 4.8333,\n 4.7792, 4.8913, 5.0027, 5.1137, 5.2241, 5.3340, 5.4433])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Madonna fired her trainer because she slept with her boyfriend.\nWith pronoun replaced: She slept with the trainer's boyfriend.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "94", + "Fraction of T in Greenlist": "47.2%", + "z-score": "7.24", + "p value": "2.18e-13", + "z-score_at_T": "tensor([1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 1.6330, 2.1170,\n 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 2.8868, 2.6605, 2.4495,\n 2.7815, 2.5820, 2.8977, 3.2004, 3.0096, 2.8284, 2.6558, 2.9439, 2.7778,\n 3.0551, 3.3235, 3.1623, 3.0072, 2.8577, 2.7136, 2.5744, 2.8301, 3.0792,\n 2.9424, 3.1844, 3.0509, 2.9212, 3.1558, 3.3853, 3.6098, 3.8297, 3.7009,\n 3.5753, 3.4528, 3.6667, 3.5466, 3.7559, 3.9614, 3.8431, 3.7273, 3.6141,\n 3.8146, 3.7033, 3.9001, 3.7905, 3.6831, 3.8759, 4.0657, 4.2528, 4.4371,\n 4.3301, 4.2251, 4.1219, 4.3026, 4.2008, 4.3788, 4.5544, 4.4537, 4.3546,\n 4.2571, 4.4296, 4.3333, 4.5034, 4.4083, 4.3146, 4.4820, 4.6476, 4.8113,\n 4.9731, 4.8797, 4.7875, 4.6967, 4.8561, 4.7662, 4.9237, 5.0795, 4.9904,\n 4.9023, 4.8154, 4.9691, 4.8830, 5.0350, 4.9497, 4.8655, 5.0156, 5.1643,\n 5.3116, 5.4576, 5.3736, 5.2906, 5.2085, 5.3526, 5.2713, 5.4140, 5.5556,\n 5.4747, 5.3947, 5.3156, 5.4554, 5.3769, 5.5155, 5.4377, 5.3606, 5.4977,\n 5.6338, 5.7689, 5.9029, 5.8260, 5.7498, 5.6743, 5.8069, 5.7320, 5.8635,\n 5.9941, 5.9196, 5.8458, 5.7726, 5.9019, 5.8292, 5.9575, 5.8853, 5.8138,\n 5.9409, 6.0671, 6.1926, 6.3172, 6.2458, 6.1750, 6.1047, 6.2282, 6.1584,\n 6.2810, 6.4028, 6.3333, 6.2644, 6.1961, 6.3168, 6.2489, 6.3688, 6.3013,\n 6.2342, 6.3532, 6.4715, 6.5891, 6.7060, 6.6391, 6.5727, 6.5067, 6.6227,\n 6.5571, 6.6724, 6.7869, 6.7217, 6.6568, 6.5924, 6.7061, 6.6421, 6.7551,\n 6.6914, 6.6282, 6.7404, 6.8520, 6.9631, 7.0736, 7.0104, 6.9477, 6.8853,\n 6.9950, 6.9330, 7.0420, 7.1506, 7.0888, 7.0273, 6.9663, 7.0741, 7.0133,\n 7.1205, 7.0601, 7.0000, 7.1065, 7.2125, 7.3180, 7.4231, 7.3631, 7.3034,\n 7.2441])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "134", + "# Tokens in Greenlist": "95", + "Fraction of T in Greenlist": "70.9%", + "z-score": "12.3", + "p value": "6.61e-35", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 1.6667, 1.1547, 0.7746, 1.4142, 1.9640, 2.4495,\n 2.1170, 1.8257, 1.5667, 2.0000, 2.4019, 2.7775, 3.1305, 3.4641,\n 3.2206, 3.5382, 3.3113, 3.6148, 3.4017, 3.2004, 3.0096, 3.2998,\n 3.5796, 3.8497, 4.1111, 4.3644, 4.1812, 4.0056, 4.2515, 4.4907,\n 4.7237, 4.9507, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 5.1121,\n 5.3199, 5.5234, 5.7229, 5.9186, 6.1107, 6.2993, 6.4846, 6.3333,\n 6.5158, 6.6953, 6.5485, 6.7254, 6.8995, 7.0711, 7.2400, 7.4066,\n 7.2648, 7.4294, 7.2910, 7.1554, 7.3183, 7.4790, 7.6376, 7.5056,\n 7.6624, 7.8174, 7.9704, 7.8416, 7.9931, 8.1428, 8.0167, 8.1650,\n 8.3116, 8.4566, 8.6000, 8.7419, 8.6192, 8.7599, 8.6393, 8.5206,\n 8.6603, 8.7986, 8.9355, 8.8192, 8.9550, 9.0896, 9.2229, 9.1088,\n 9.2410, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 10.0107, 10.1352,\n 10.2587, 10.3812, 10.2706, 10.3923, 10.5131, 10.4042, 10.5243, 10.6434,\n 10.7616, 10.8790, 10.7722, 10.8889, 11.0047, 11.1197, 11.0147, 10.9109,\n 10.8082, 10.9229, 10.8215, 10.9355, 11.0488, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.5048, 11.6139, 11.7222, 11.6242, 11.7320, 11.8392,\n 11.9457, 12.0516, 11.9551, 12.0605, 12.1652, 12.2694])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Papa looked down at the children's faces, so puzzled and sad now. It was bad enough that they had to be denied so many things because he couldn't afford them.\nWith pronoun replaced: He couldn't afford the children.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "45", + "Fraction of T in Greenlist": "22.6%", + "z-score": "-0.778", + "p value": "0.782", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 1.8074, 1.4142, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.6667, 0.4804, 0.9258, 1.3416, 1.7321,\n 2.1004, 2.4495, 2.2517, 2.0656, 1.8898, 1.7233, 1.5650, 1.4142,\n 1.7321, 1.5852, 1.8889, 1.7457, 2.0370, 1.8974, 1.7628, 1.6330,\n 1.5076, 1.3862, 1.2687, 1.5396, 1.4237, 1.3112, 1.2019, 1.0954,\n 0.9918, 1.2472, 1.1446, 1.0445, 0.9467, 0.8513, 0.7579, 0.6667,\n 0.5774, 0.8165, 0.7276, 0.9608, 1.1896, 1.4142, 1.6348, 1.8516,\n 1.7589, 1.6678, 1.5785, 1.4907, 1.4045, 1.3198, 1.5275, 1.4434,\n 1.6471, 1.5635, 1.7634, 1.6803, 1.5986, 1.5181, 1.4389, 1.3608,\n 1.2839, 1.2081, 1.1333, 1.0596, 0.9869, 0.9152, 0.8444, 0.7746,\n 0.7057, 0.6376, 0.5704, 0.5040, 0.4384, 0.6226, 0.5571, 0.4924,\n 0.4284, 0.3651, 0.3026, 0.2408, 0.4191, 0.3573, 0.2962, 0.2357,\n 0.1759, 0.1166, 0.0580, 0.0000, -0.0574, -0.1143, 0.0569, 0.0000,\n -0.0563, -0.1122, -0.1674, 0.0000, -0.0553, -0.1101, -0.1644, -0.2182,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.3705, -0.4216,\n -0.4724, -0.5227, -0.5726, -0.4148, -0.2582, -0.1029, -0.1537, -0.2041,\n -0.2542, -0.3038, -0.3531, -0.4020, -0.4506, -0.4988, -0.5466, -0.5941,\n -0.6412, -0.6881, -0.7346, -0.7807, -0.8266, -0.8721, -0.7242, -0.7698,\n -0.8151, -0.6689, -0.5238, -0.5695, -0.6149, -0.6600, -0.7048, -0.5620,\n -0.6068, -0.4652, -0.5101, -0.3698, -0.2304, -0.2756, -0.3205, -0.3651,\n -0.4095, -0.4536, -0.3166, -0.3607, -0.4045, -0.4481, -0.4914, -0.3563,\n -0.3997, -0.2657, -0.3091, -0.3522, -0.2195, -0.2626, -0.3055, -0.1741,\n -0.2170, -0.2596, -0.3021, -0.3443, -0.3862, -0.4280, -0.4695, -0.5108,\n -0.5518, -0.5927, -0.6333, -0.6737, -0.5459, -0.4189, -0.4595, -0.5000,\n -0.5403, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.7776])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 2.3094, 2.8402, 2.3570, 2.8368, 3.2660,\n 3.6566, 4.0166, 4.3519, 4.6667, 4.9640, 5.2463, 5.5156, 5.1962,\n 4.9010, 4.6268, 4.9008, 5.1640, 4.9135, 5.1698, 5.4174, 5.1855,\n 5.4271, 5.6614, 5.8889, 6.1101, 6.3255, 6.5354, 6.7402, 6.9402,\n 7.1358, 6.9310, 6.7337, 6.5433, 6.7390, 6.9307, 6.7489, 6.9378,\n 7.1232, 6.9488, 7.1317, 7.3113, 7.4878, 7.6613, 7.8320, 8.0000,\n 8.1654, 8.3283, 8.4887, 8.3267, 8.1684, 8.0139, 8.1742, 8.3324,\n 8.1825, 8.3391, 8.4936, 8.3480, 8.5010, 8.6522, 8.8015, 8.9489,\n 9.0947, 9.2387, 9.3811, 9.5219, 9.6612, 9.5229, 9.3871, 9.2536,\n 9.3927, 9.5304, 9.4000, 9.5366, 9.6719, 9.5443, 9.6786, 9.8116,\n 9.9433, 10.0737, 10.2030, 10.3310, 10.4579, 10.5837, 10.7084, 10.5859,\n 10.4650, 10.3459, 10.4704, 10.5940, 10.4770, 10.5997, 10.7215, 10.6066,\n 10.7277, 10.8477, 10.9669, 11.0851, 11.2025, 11.3189, 11.4345, 11.5492,\n 11.6631, 11.5519, 11.4420, 11.3333, 11.4471, 11.5601, 11.4531, 11.5655,\n 11.6772, 11.5718, 11.6829, 11.7932, 11.9029, 12.0118, 12.1200, 12.2275,\n 12.3343, 12.4405, 12.5460, 12.4434, 12.3419, 12.2414, 12.3468, 12.4516,\n 12.3524, 12.4567, 12.5604, 12.4625, 12.5657, 12.6684, 12.7704, 12.8719,\n 12.9728, 13.0732, 13.1730, 13.2722, 13.3710, 13.2753, 13.1806, 13.0866,\n 13.1852, 13.2834, 13.1905, 13.2882, 13.3854, 13.2936, 13.3905, 13.4868,\n 13.5827, 13.6781, 13.7730, 13.8675, 13.9615, 14.0550, 14.1481, 14.0582,\n 13.9690, 13.8804, 13.9735, 14.0660, 13.9784, 14.0707, 14.1625, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.4394, 14.5293, 14.6188, 14.7079, 14.7966,\n 14.8849, 14.7998, 14.7152, 14.6313, 14.7195, 14.8074, 14.7242, 14.8119,\n 14.8991, 14.8167, 14.9037, 14.9903, 15.0766, 15.1625, 15.2481, 15.3333,\n 15.4182, 15.5028, 15.5870, 15.5060, 15.4254, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: If the con artist has succeeded in fooling Sam, he would have gotten a lot of money.\nWith pronoun replaced: The con artist would have gotten a lot of money.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "65", + "Fraction of T in Greenlist": "32.7%", + "z-score": "2.5", + "p value": "0.00627", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 0.4804, 0.3086, 0.7454, 0.5774,\n 0.4201, 0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, 0.0000,\n 0.3015, 0.1980, 0.4880, 0.3849, 0.2847, 0.1873, 0.0925, 0.0000,\n -0.0902, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.0825, -0.1633, -0.2425, 0.0000, -0.0793, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.1516, 0.0752, 0.0000, -0.0739, 0.1466, 0.0727, 0.0000,\n -0.0716, -0.1421, -0.2116, -0.2801, -0.0695, -0.1380, -0.2056, 0.0000,\n -0.0676, 0.1342, 0.0667, 0.0000, 0.1974, 0.1307, 0.0650, 0.2582,\n 0.4491, 0.6376, 0.5704, 0.5040, 0.4384, 0.3735, 0.5571, 0.4924,\n 0.6732, 0.8520, 0.7868, 0.9631, 0.8980, 0.8337, 0.7701, 0.7071,\n 0.6448, 0.5832, 0.7543, 0.6928, 0.6319, 0.8003, 0.7395, 0.9058,\n 0.8452, 1.0094, 1.1721, 1.1111, 1.2719, 1.2111, 1.1508, 1.0911,\n 1.0319, 0.9733, 0.9152, 1.0721, 1.0141, 0.9567, 1.1114, 1.0541,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.3373, 1.2808, 1.2247,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.0973, 1.0435, 1.1882,\n 1.1345, 1.2778, 1.2243, 1.3663, 1.3128, 1.2597, 1.2070, 1.1547,\n 1.2946, 1.2423, 1.1905, 1.3288, 1.4662, 1.6028, 1.5505, 1.4985,\n 1.4470, 1.3957, 1.5303, 1.4792, 1.4284, 1.5617, 1.5110, 1.6432,\n 1.5926, 1.7237, 1.6732, 1.8033, 1.9327, 1.8821, 1.8317, 1.7817,\n 1.7321, 1.6827, 1.6336, 1.7609, 1.7119, 1.6632, 1.7894, 1.7408,\n 1.8660, 1.8175, 1.9419, 1.8935, 2.0170, 2.1398, 2.0913, 2.0430,\n 1.9950, 1.9473, 1.8999, 1.8527, 1.9738, 1.9267, 1.8799, 2.0000,\n 1.9533, 2.0726, 2.0259, 2.1444, 2.2624, 2.3798, 2.4966])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "112", + "Fraction of T in Greenlist": "56.3%", + "z-score": "10.2", + "p value": "1.09e-24", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, 0.3651, 0.1741, 0.6667, 1.1209, 0.9258, 1.3416, 1.7321,\n 2.1004, 1.9052, 2.2517, 2.5820, 2.3938, 2.2156, 2.0466, 2.3570,\n 2.1939, 2.4910, 2.3333, 2.6186, 2.8947, 2.7406, 3.0072, 2.8577,\n 3.1156, 3.3665, 3.6108, 3.4641, 3.7017, 3.5590, 3.7905, 3.6515,\n 3.8772, 4.0980, 3.9620, 4.1779, 4.0451, 4.2563, 4.4634, 4.6667,\n 4.5363, 4.4091, 4.6082, 4.8038, 4.9962, 4.8712, 5.0602, 4.9377,\n 4.8177, 5.0034, 5.1864, 5.3666, 5.5442, 5.7192, 5.8919, 6.0622,\n 5.9438, 6.1118, 6.2776, 6.1612, 6.0469, 5.9346, 5.8241, 5.9876,\n 6.1492, 6.0404, 5.9333, 5.8279, 5.9874, 6.1450, 6.3008, 6.4550,\n 6.6075, 6.5033, 6.4006, 6.2994, 6.1996, 6.3502, 6.2517, 6.4008,\n 6.3035, 6.2075, 6.1128, 6.2601, 6.1664, 6.0740, 5.9827, 5.8926,\n 5.8035, 5.7155, 5.8606, 5.7735, 5.6874, 5.8310, 5.9732, 5.8878,\n 6.0288, 5.9442, 6.0838, 6.0000, 6.1383, 6.2755, 6.4116, 6.5465,\n 6.4632, 6.5970, 6.7298, 6.8615, 6.9923, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.7566, 7.6734, 7.5910, 7.5094, 7.4286, 7.3485,\n 7.4724, 7.3930, 7.5161, 7.4373, 7.5595, 7.6808, 7.8014, 7.9212,\n 7.8429, 7.9619, 8.0801, 8.1976, 8.3143, 8.4303, 8.5456, 8.4678,\n 8.3906, 8.3140, 8.4286, 8.3526, 8.4664, 8.3910, 8.5041, 8.4293,\n 8.5417, 8.6535, 8.7647, 8.8752, 8.8008, 8.9107, 9.0200, 9.1287,\n 9.2368, 9.3443, 9.4513, 9.3774, 9.3040, 9.2311, 9.3374, 9.2651,\n 9.3708, 9.2990, 9.4042, 9.3328, 9.4375, 9.5416, 9.6452, 9.7483,\n 9.6774, 9.7800, 9.8821, 9.9837, 10.0848, 10.1855, 10.2856, 10.3853,\n 10.4846, 10.4140, 10.5128, 10.4427, 10.5410, 10.4713, 10.4021, 10.3333,\n 10.2650, 10.1970, 10.1295, 10.0624, 10.1602, 10.2576, 10.1909])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Mr. Taylor was a man of uncertain temper and his general tendency was to think that David was a poor chump and that whatever step he took in any direction on his own account was just another proof of his innate idiocy.\nWith pronoun replaced: Any direction on his own account was just another proof of David's innate idiocy.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "46", + "Fraction of T in Greenlist": "23.1%", + "z-score": "-0.614", + "p value": "0.73", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -0.7537, -0.8165,\n -0.6082, -0.6712, -0.7333, -0.5298, -0.3290, -0.1307, -0.1949, -0.2582,\n -0.3208, -0.1275, -0.1901, 0.0000, 0.1879, 0.1245, 0.0619, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.0000,\n -0.0586, -0.1166, -0.1741, -0.2309, -0.0574, -0.1143, -0.1707, -0.2265,\n -0.2817, -0.3365, -0.3907, -0.2222, -0.0553, 0.1101, 0.0548, 0.0000,\n -0.0543, -0.1081, -0.1615, -0.2144, -0.2669, -0.3189, -0.1588, -0.2108,\n -0.2624, -0.3136, -0.3644, -0.2074, -0.2582, -0.3086, -0.3586, -0.4082,\n -0.4575, -0.3038, -0.1513, -0.2010, -0.0501, -0.0998, -0.1491, -0.1980,\n -0.2466, -0.2949, -0.3428, -0.1952, -0.2431, -0.2907, -0.1448, -0.1925,\n -0.2397, -0.2867, -0.3333, -0.3797, -0.4257, -0.4714, -0.5168, -0.3746,\n -0.2334, -0.2791, -0.3246, -0.1849, -0.2304, -0.2756, -0.3205, -0.1826,\n -0.0455, -0.0907, -0.1357, -0.1803, -0.2247, -0.2689, -0.3127, -0.3563,\n -0.3997, -0.4428, -0.4857, -0.5283, -0.5706, -0.6128, -0.6547, -0.6963,\n -0.7377, -0.7789, -0.8199, -0.6885, -0.7295, -0.7703, -0.8109, -0.6810,\n -0.7216, -0.5927, -0.6333, -0.6737, -0.7139, -0.7539, -0.7937, -0.8333,\n -0.7065, -0.5803, -0.6202, -0.6598, -0.6993, -0.7385, -0.6139])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "73", + "Fraction of T in Greenlist": "36.7%", + "z-score": "3.81", + "p value": "7.05e-05", + "z-score_at_T": "tensor([ 1.7321, 2.4495, 1.6667, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, 0.4804, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.6794, -0.7778, -0.4364, -0.5361, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, -0.2928, -0.3849, -0.4746, -0.5620, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,\n -0.7423, -0.8165, -0.8893, -0.6405, -0.7137, -0.7857, -0.8563, -0.6172,\n -0.6882, -0.7581, -0.8268, -0.8944, -0.9610, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -1.0580, -1.1202, -0.9036, -0.9661, -1.0278, -1.0887,\n -0.8785, -0.6712, -0.7333, -0.5298, -0.3290, -0.1307, 0.0650, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, 0.1245, 0.0619, 0.2462,\n 0.4284, 0.6086, 0.5447, 0.4815, 0.6586, 0.5955, 0.7701, 0.7071,\n 0.8793, 0.8165, 0.7543, 0.6928, 0.6319, 0.5717, 0.5120, 0.4529,\n 0.6198, 0.5608, 0.5023, 0.6667, 0.6083, 0.5505, 0.7124, 0.8729,\n 0.8147, 0.7570, 0.9152, 1.0721, 1.0141, 0.9567, 0.8997, 1.0541,\n 1.2072, 1.1500, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 1.1692, 1.1140, 1.0593, 1.0050, 1.1514, 1.2968, 1.2423, 1.3862,\n 1.3318, 1.4744, 1.4201, 1.3663, 1.5073, 1.4535, 1.5933, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.5187, 1.4662, 1.4142, 1.3625, 1.4985,\n 1.6337, 1.5818, 1.7158, 1.8490, 1.7970, 1.7454, 1.8773, 2.0083,\n 1.9566, 1.9052, 1.8541, 1.9837, 2.1125, 2.0613, 2.1892, 2.1381,\n 2.2650, 2.2140, 2.3400, 2.2892, 2.2387, 2.3635, 2.3131, 2.4371,\n 2.3868, 2.3368, 2.2871, 2.4099, 2.5319, 2.4822, 2.6034, 2.7240,\n 2.8440, 2.7940, 2.9132, 3.0317, 3.1497, 3.2671, 3.3838, 3.5000,\n 3.6156, 3.7306, 3.8451, 3.7940, 3.7432, 3.6927, 3.8062])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: This morning, Joey built a sand castle on the beach, and put a toy flag in the highest tower, but this afternoon the wind knocked it down.\nWith pronoun replaced: This afternoon the wind knocked The sand castle down.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 0.8165,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.1400, -0.2722, 0.1325, 0.0000, -0.1260, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, 0.3216, 0.2108, 0.1037, 0.0000,\n -0.1005, -0.1980, -0.2928, 0.0000, 0.2847, 0.1873, 0.0925, 0.3651,\n 0.2705, 0.1782, 0.0880, 0.0000, -0.0861, -0.1703, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.5449, -0.3086,\n -0.3824, -0.4549, -0.2255, -0.2981, -0.3696, -0.4399, -0.5092, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -1.0887,\n -1.1488, -0.9396, -0.7333, -0.7947, -0.8553, -0.6537, -0.7145, -0.7746,\n -0.8340, -0.8926, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.0719, -1.1255, -1.1785,\n -0.9966, -1.0498, -1.1025, -1.1547, -1.2064, -1.2577, -1.3084, -1.3587,\n -1.4086, -1.2337, -1.0605, -1.1111, -1.1613, -1.2111, -1.2604, -1.3093,\n -1.3578, -1.1896, -1.0229, -1.0721, -1.1209, -0.9567, -1.0056, -1.0541,\n -1.1022, -1.1500, -1.1973, -1.2443, -1.2910, -1.3373, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.6081, -1.4518, -1.2968, -1.3416, -1.3862,\n -1.2332, -1.2778, -1.3222, -1.3663, -1.4100, -1.4535, -1.4967, -1.5396,\n -1.5822, -1.4335, -1.2857, -1.3288, -1.3717, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.3957, -1.2521, -1.2943, -1.3362, -1.1942, -1.2362, -1.2780,\n -1.3195, -1.3608, -1.4019, -1.4427, -1.4832, -1.5236, -1.5637, -1.6036,\n -1.6432, -1.6827, -1.7219, -1.7609, -1.6241, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.6262, -1.6645, -1.7025,\n -1.7404, -1.6087, -1.4777, -1.5159, -1.5539, -1.5916, -1.6292, -1.6667,\n -1.7039, -1.5752, -1.4471, -1.4846, -1.5220, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "121", + "Fraction of T in Greenlist": "60.8%", + "z-score": "11.7", + "p value": "9.7e-32", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 1.0911, 1.6330,\n 2.1170, 2.5560, 2.2630, 2.0000, 1.7614, 2.1602, 1.9379, 2.3094,\n 2.6605, 2.9938, 3.3113, 3.0984, 2.8977, 2.7080, 3.0096, 2.8284,\n 3.1177, 3.3968, 3.6667, 3.9279, 4.1812, 4.0056, 3.8367, 4.0825,\n 4.3217, 4.5547, 4.7819, 5.0037, 5.2204, 5.0576, 4.8999, 4.7469,\n 4.5985, 4.8107, 4.6664, 4.8742, 4.7336, 4.9373, 5.1371, 5.3333,\n 5.5261, 5.3889, 5.2549, 5.1241, 5.3134, 5.1854, 5.0602, 5.2463,\n 5.4295, 5.6099, 5.7877, 5.9628, 5.8398, 6.0125, 6.1828, 6.3509,\n 6.2302, 6.1118, 6.2776, 6.4413, 6.3249, 6.4866, 6.3723, 6.2598,\n 6.4195, 6.3089, 6.4667, 6.6227, 6.7769, 6.9294, 6.8205, 6.7132,\n 6.8641, 7.0133, 7.1611, 7.3073, 7.4521, 7.5954, 7.4897, 7.3855,\n 7.5275, 7.6681, 7.8074, 7.7047, 7.8428, 7.7414, 7.6413, 7.5425,\n 7.6794, 7.8150, 7.9495, 8.0829, 7.9853, 8.1176, 8.2488, 8.1524,\n 8.0571, 7.9630, 8.0931, 8.2222, 8.3503, 8.2572, 8.3843, 8.2923,\n 8.2012, 8.1111, 8.0219, 8.1481, 8.2733, 8.3976, 8.5210, 8.4327,\n 8.5553, 8.6770, 8.7978, 8.9178, 8.8304, 8.9496, 9.0680, 9.1856,\n 9.0991, 9.2159, 9.1302, 9.0453, 8.9612, 9.0773, 9.1927, 9.3074,\n 9.4213, 9.3380, 9.4513, 9.5638, 9.4812, 9.3993, 9.3181, 9.4301,\n 9.5413, 9.6519, 9.5714, 9.6814, 9.6016, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.8776, 9.7997, 9.9067, 10.0131, 10.1189, 10.2242,\n 10.1469, 10.2516, 10.3557, 10.4594, 10.3827, 10.4858, 10.4097, 10.3341,\n 10.2591, 10.3617, 10.4638, 10.5654, 10.6665, 10.5921, 10.6927, 10.7928,\n 10.8925, 10.9917, 10.9178, 11.0165, 11.1148, 11.2126, 11.1392, 11.2366,\n 11.1637, 11.0913, 11.0194, 11.1164, 11.2129, 11.3091, 11.4047, 11.3333,\n 11.2624, 11.3577, 11.4525, 11.5470, 11.6411, 11.7347, 11.6643])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: In the storm, the tree fell down and crashed through the roof of my house. Now, I have to get it repaired.\nWith pronoun replaced: Now I have to get The roof repaired.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "37", + "Fraction of T in Greenlist": "18.6%", + "z-score": "-2.09", + "p value": "0.982", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -1.0954, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, -0.2722, -0.3974, -0.5164, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, 0.1111, 0.0000, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.5941, -0.6831, -0.3849, -0.4746, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -1.1431, -0.8893, -0.9608, -1.0310, -1.0999, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.6653, -0.7332, -0.8001, -0.8660,\n -0.9309, -0.9949, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -0.9152, -0.9744, -1.0328,\n -1.0906, -1.1476, -0.9506, -1.0079, -1.0646, -1.1206, -1.1761, -1.2309,\n -1.2852, -1.3389, -1.3920, -1.4446, -1.2572, -1.3101, -1.3624, -1.4142,\n -1.4655, -1.5164, -1.5667, -1.3856, -1.4362, -1.4863, -1.5360, -1.3587,\n -1.4086, -1.4580, -1.5070, -1.5556, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.5751, -1.6222, -1.6690, -1.7154, -1.7614, -1.8071, -1.6407, -1.6865,\n -1.7321, -1.7772, -1.6138, -1.6591, -1.7041, -1.7488, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.7655, -1.6081, -1.6521, -1.6958, -1.7392, -1.7823,\n -1.8251, -1.8676, -1.9098, -1.9518, -1.7990, -1.8411, -1.8829, -1.9245,\n -1.7740, -1.8157, -1.8571, -1.8983, -1.9392, -1.9799, -1.8324, -1.6859,\n -1.7270, -1.7679, -1.8086, -1.8490, -1.8892, -1.7454, -1.7857, -1.8257,\n -1.8656, -1.9052, -1.9445, -1.9837, -2.0226, -2.0613, -2.0998, -1.9599,\n -1.8209, -1.8598, -1.8985, -1.9370, -1.9753, -2.0134, -1.8767, -1.9149,\n -1.9528, -1.9906, -1.8556, -1.8935, -1.9311, -1.9686, -2.0059, -2.0430,\n -2.0799, -2.1167, -2.1532, -2.1896, -2.0578, -2.0943, -2.1306, -2.1667,\n -2.2026, -2.2384, -2.1086, -2.1444, -2.1801, -2.2156, -2.0873])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "145", + "Fraction of T in Greenlist": "72.9%", + "z-score": "15.6", + "p value": "4.04e-55", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, 0.0000, 0.7746, 1.4142, 1.9640, 1.6330,\n 2.1170, 1.8257, 1.5667, 2.0000, 1.7614, 2.1602, 1.9379, 1.7321,\n 1.5403, 1.9052, 2.2517, 2.5820, 2.8977, 3.2004, 3.4912, 3.7712,\n 4.0415, 3.8497, 3.6667, 3.4915, 3.7524, 4.0056, 4.2515, 4.4907,\n 4.3217, 4.1586, 4.3916, 4.6188, 4.4610, 4.6829, 4.8999, 4.7469,\n 4.5985, 4.4544, 4.6664, 4.8742, 5.0779, 5.2778, 5.1371, 5.3333,\n 5.5261, 5.7155, 5.9017, 6.0849, 6.2651, 6.4425, 6.6172, 6.7893,\n 6.9589, 7.1261, 7.2910, 7.1554, 7.0226, 6.8924, 7.0557, 7.2169,\n 7.3760, 7.5331, 7.4061, 7.2815, 7.1590, 7.3147, 7.4686, 7.6206,\n 7.7710, 7.9196, 8.0667, 8.2121, 8.3560, 8.4984, 8.6393, 8.7788,\n 8.9169, 9.0536, 9.1890, 9.3231, 9.4560, 9.5876, 9.7181, 9.6011,\n 9.4858, 9.3721, 9.5021, 9.6309, 9.7586, 9.8852, 9.7738, 9.6638,\n 9.5552, 9.4480, 9.5743, 9.6995, 9.8237, 9.9469, 10.0692, 10.1905,\n 10.3109, 10.4304, 10.5490, 10.6667, 10.7835, 10.8995, 11.0147, 11.1291,\n 11.2427, 11.3555, 11.4675, 11.3644, 11.2623, 11.1614, 11.2732, 11.3842,\n 11.4945, 11.6041, 11.5048, 11.4065, 11.3091, 11.2127, 11.3222, 11.4310,\n 11.5391, 11.6465, 11.7533, 11.8594, 11.9650, 12.0699, 12.1741, 12.2778,\n 12.3809, 12.4834, 12.5853, 12.6867, 12.7875, 12.8877, 12.9874, 12.8942,\n 12.8017, 12.7100, 12.8095, 12.9085, 13.0071, 13.1050, 13.0146, 12.9249,\n 12.8359, 12.7476, 12.8456, 12.9430, 13.0400, 13.1364, 13.2324, 13.3279,\n 13.4230, 13.5176, 13.6117, 13.7054, 13.7986, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.2584, 14.3491, 14.2633, 14.1781, 14.0936, 14.1842, 14.2744,\n 14.3642, 14.4536, 14.3700, 14.2870, 14.2046, 14.1227, 14.2121, 14.3011,\n 14.3897, 14.4780, 14.5659, 14.6534, 14.7406, 14.8274, 14.9139, 15.0000,\n 15.0858, 15.1712, 15.2563, 15.3410, 15.4254, 15.5095, 15.5933])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 0 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: I poured water from the bottle into the cup until it was empty.\nWith pronoun replaced: The bottle was empty.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "41", + "Fraction of T in Greenlist": "20.6%", + "z-score": "-1.43", + "p value": "0.924", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, 0.0000,\n 0.5774, 0.3651, 0.1741, 0.0000, -0.1601, -0.3086, -0.4472, -0.5774,\n -0.7001, -0.8165, -0.3974, -0.5164, -0.6299, -0.7385, -0.8427, -0.9428,\n -1.0392, -0.6794, -0.7778, -0.8729, -0.9649, -1.0541, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.4639, -1.1547, -0.8542, -0.5620, -0.6472, -0.7303,\n -0.8115, -0.8909, -0.9685, -1.0445, -1.1189, -1.1918, -0.9264, -1.0000,\n -1.0722, -1.1431, -1.2127, -1.2810, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.3820, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.1816, -1.2421, -1.3019, -1.0887,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.6341, -1.6854, -1.7362, -1.7865, -1.8363, -1.8856,\n -1.9345, -1.7496, -1.7988, -1.8475, -1.8958, -1.9437, -1.9911, -1.8116,\n -1.8593, -1.9066, -1.9535, -2.0000, -1.8249, -1.8716, -1.9180, -1.9640,\n -2.0096, -1.8385, -1.6690, -1.7154, -1.7614, -1.5945, -1.6407, -1.6865,\n -1.7321, -1.5681, -1.6138, -1.6591, -1.7041, -1.5430, -1.3833, -1.4289,\n -1.4742, -1.5191, -1.5637, -1.4071, -1.4518, -1.4963, -1.5404, -1.5842,\n -1.6278, -1.6710, -1.7140, -1.5614, -1.6045, -1.6473, -1.6898, -1.7321,\n -1.5822, -1.6246, -1.6667, -1.5187, -1.5608, -1.4142, -1.4565, -1.4985,\n -1.5403, -1.5818, -1.4376, -1.4792, -1.5206, -1.3779, -1.4194, -1.4606,\n -1.5016, -1.3608, -1.4019, -1.4427, -1.3035, -1.3443, -1.3850, -1.4254,\n -1.4656, -1.5055, -1.5453, -1.4087, -1.4485, -1.4881, -1.5275, -1.5667,\n -1.4321, -1.4713, -1.5104, -1.5492, -1.5878, -1.4551, -1.3230, -1.1918,\n -1.2310, -1.2700, -1.3088, -1.3474, -1.2179, -1.2566, -1.2950, -1.3333,\n -1.3714, -1.4093, -1.4471, -1.4846, -1.3574, -1.3950, -1.4325])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "144", + "Fraction of T in Greenlist": "72.4%", + "z-score": "15.4", + "p value": "5.18e-54", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 1.8074, 2.3570, 2.8368, 2.4495,\n 2.8868, 3.2863, 3.6556, 4.0000, 3.6829, 4.0119, 4.3231, 4.0415,\n 4.3409, 4.6268, 4.9008, 4.6476, 4.4096, 4.6775, 4.9358, 5.1855,\n 5.4271, 5.2085, 5.4444, 5.6737, 5.8966, 6.1137, 5.9106, 6.1237,\n 6.3317, 6.1389, 6.3434, 6.5433, 6.7390, 6.5561, 6.3791, 6.5727,\n 6.7625, 6.9488, 7.1317, 6.9631, 7.1435, 7.3208, 7.4952, 7.6667,\n 7.5056, 7.6751, 7.8420, 7.6862, 7.8512, 8.0139, 8.1742, 8.0238,\n 7.8766, 8.0358, 8.1929, 8.3480, 8.5010, 8.3589, 8.5105, 8.6603,\n 8.8082, 8.9544, 8.8168, 8.9618, 9.1051, 8.9709, 9.1130, 9.2536,\n 9.3927, 9.2620, 9.1333, 9.2717, 9.4087, 9.5443, 9.6786, 9.5534,\n 9.6867, 9.8187, 9.9495, 10.0791, 9.9570, 10.0857, 10.2132, 10.0935,\n 10.2202, 10.3459, 10.4704, 10.3532, 10.2375, 10.3615, 10.4846, 10.6066,\n 10.7277, 10.6145, 10.7348, 10.8542, 10.9727, 11.0902, 10.9794, 11.0963,\n 11.2124, 11.1033, 11.2187, 11.3333, 11.4471, 11.3399, 11.2339, 11.3473,\n 11.4599, 11.5718, 11.6829, 11.5788, 11.6894, 11.7992, 11.9083, 12.0167,\n 11.9144, 12.0223, 12.1295, 12.0286, 12.1353, 12.2414, 12.3468, 12.2474,\n 12.1491, 12.2541, 12.3586, 12.4625, 12.5657, 12.4689, 12.5717, 12.6739,\n 12.7755, 12.8766, 12.7812, 12.8819, 12.9820, 12.8877, 12.9874, 13.0866,\n 13.1852, 13.0922, 13.0000, 13.0984, 13.1962, 13.2936, 13.3905, 13.2995,\n 13.3960, 13.4920, 13.5876, 13.6826, 13.5929, 13.6876, 13.7818, 13.6931,\n 13.7870, 13.8804, 13.9735, 13.8857, 13.7986, 13.8914, 13.9838, 14.0758,\n 14.1673, 14.0813, 14.1725, 14.2633, 14.3537, 14.4437, 14.3587, 14.4484,\n 14.5378, 14.4536, 14.5426, 14.6313, 14.7195, 14.6362, 14.5535, 14.6416,\n 14.7293, 14.8167, 14.9037, 14.8219, 14.9086, 14.9950, 15.0810, 15.1667,\n 15.0858, 15.1712, 15.2563, 15.1761, 15.2609, 15.3454, 15.4296])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: Alice looked for her friend Jade in the crowd. Since she always wears a red turban, Alice spotted her quickly.\nWith pronoun replaced: Since Alice always wears a red turban, Alice spotted her quickly.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "35", + "Fraction of T in Greenlist": "17.6%", + "z-score": "-2.41", + "p value": "0.992", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n 0.4201, 0.2722, 0.6623, 0.5164, 0.3780, 0.2462, 0.1204, 0.0000,\n -0.1155, -0.2265, -0.3333, 0.0000, -0.1072, -0.2108, -0.3111, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.3482, -0.4303, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.6405, -0.7137, -0.7857, -0.8563, -0.9258,\n -0.9941, -1.0613, -1.1275, -1.1926, -1.2567, -1.3198, -1.0911, -1.1547,\n -1.2174, -1.2792, -1.3402, -1.4003, -1.4596, -1.2421, -1.3019, -1.3608,\n -1.1488, -1.2081, -1.2667, -1.3245, -1.3817, -1.4382, -1.2342, -1.2910,\n -1.3472, -1.4027, -1.4576, -1.5119, -1.3151, -1.3697, -1.4237, -1.4771,\n -1.5300, -1.5823, -1.3920, -1.4446, -1.4967, -1.5483, -1.5993, -1.6499,\n -1.4655, -1.5164, -1.5667, -1.6166, -1.6660, -1.7150, -1.5360, -1.5852,\n -1.6340, -1.6823, -1.7303, -1.7778, -1.6037, -1.6514, -1.6988, -1.7457,\n -1.7923, -1.8385, -1.6690, -1.7154, -1.7614, -1.8071, -1.8524, -1.8974,\n -1.7321, -1.7772, -1.8220, -1.8665, -1.9107, -1.9545, -1.7931, -1.8371,\n -1.8808, -1.9242, -1.9673, -2.0101, -1.8523, -1.8953, -1.9379, -1.9803,\n -2.0224, -2.0642, -1.9098, -1.9518, -1.9935, -2.0349, -2.0761, -2.1170,\n -1.9658, -2.0068, -2.0476, -2.0881, -2.1284, -2.1685, -2.0203, -2.0605,\n -2.1004, -2.1401, -2.1796, -2.2188, -2.0735, -2.1128, -2.1520, -2.1909,\n -2.2296, -2.2680, -2.1254, -2.1640, -2.2024, -2.2406, -2.2785, -2.3163,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.2258, -2.2630,\n -2.3000, -2.3368, -2.3734, -2.4099, -2.2744, -2.3110, -2.3473, -2.3835,\n -2.4195, -2.4553, -2.3221, -2.3580, -2.3938, -2.4294, -2.4648, -2.5000,\n -2.3688, -2.4042, -2.4393, -2.4744, -2.5092, -2.5439, -2.4147])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "86", + "Fraction of T in Greenlist": "43.2%", + "z-score": "5.93", + "p value": "1.47e-09", + "z-score_at_T": "tensor([1.7321, 0.8165, 0.3333, 1.1547, 0.7746, 0.4714, 0.2182, 0.8165, 1.3472,\n 1.8257, 2.2630, 2.6667, 2.4019, 2.7775, 3.1305, 2.8868, 3.2206, 2.9938,\n 2.7815, 3.0984, 2.8977, 2.7080, 2.5281, 2.8284, 3.1177, 2.9439, 2.7778,\n 3.0551, 2.8947, 2.7406, 2.5924, 2.8577, 2.7136, 2.9704, 2.8301, 2.6943,\n 2.5627, 2.8098, 2.6811, 2.5560, 2.7952, 3.0290, 2.9055, 2.7852, 2.6681,\n 2.8943, 2.7791, 2.6667, 2.8868, 2.7761, 2.6679, 2.5621, 2.7757, 2.9856,\n 3.1918, 3.3947, 3.5942, 3.4873, 3.6831, 3.8759, 3.7700, 3.9595, 3.8552,\n 3.7528, 3.9386, 3.8376, 3.7383, 3.6407, 3.8228, 4.0024, 3.9056, 3.8103,\n 3.9869, 3.8927, 3.8000, 3.7087, 3.8819, 3.7916, 3.9624, 3.8730, 3.7849,\n 3.6979, 3.8657, 3.7796, 3.6947, 3.8600, 4.0234, 3.9389, 3.8555, 3.7732,\n 3.9340, 3.8523, 3.7717, 3.9302, 3.8503, 3.7712, 3.6931, 3.8492, 4.0038,\n 4.1569, 4.3086, 4.4590, 4.3804, 4.5291, 4.6765, 4.5983, 4.7442, 4.6667,\n 4.5899, 4.7341, 4.6580, 4.5826, 4.5079, 4.6503, 4.7916, 4.7173, 4.6437,\n 4.7834, 4.7104, 4.6380, 4.5663, 4.7044, 4.6332, 4.7700, 4.6992, 4.6291,\n 4.5596, 4.6949, 4.6258, 4.5573, 4.6912, 4.8242, 4.7559, 4.6883, 4.6212,\n 4.7527, 4.6860, 4.6198, 4.7501, 4.6843, 4.6191, 4.5543, 4.6832, 4.8113,\n 4.9385, 5.0649, 5.1905, 5.1255, 5.2501, 5.3740, 5.3092, 5.4322, 5.3677,\n 5.3038, 5.4257, 5.3621, 5.2989, 5.2362, 5.3571, 5.4772, 5.4147, 5.3526,\n 5.4718, 5.4100, 5.3487, 5.2877, 5.4059, 5.3452, 5.4626, 5.4023, 5.3423,\n 5.2827, 5.3991, 5.3398, 5.2809, 5.3964, 5.5113, 5.4526, 5.3941, 5.3361,\n 5.4501, 5.3923, 5.3349, 5.4480, 5.3909, 5.3340, 5.2775, 5.3898, 5.5015,\n 5.6126, 5.7233, 5.8333, 5.7766, 5.8861, 5.9950, 5.9385, 6.0468, 5.9905,\n 5.9345])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 1, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: The dog chased the cat, which ran up a tree. It waited at the top.\nWith pronoun replaced: The cat waited at the top.\nEntailment:", + "true_label": 1, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "96", + "Fraction of T in Greenlist": "48.2%", + "z-score": "7.57", + "p value": "1.84e-14", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.5361, -0.6325, -0.7259, -0.8165,\n -0.9045, -0.5941, -0.6831, -0.7698, -0.8542, -0.9366, -1.0170, -1.0954,\n -1.1722, -0.8909, -0.9685, -1.0445, -0.7746, -0.8513, -0.9264, -1.0000,\n -1.0722, -0.8165, -0.8893, -0.9608, -1.0310, -1.0999, -1.1677, -1.2344,\n -0.9941, -1.0613, -1.1275, -0.8944, -0.6653, -0.7332, -0.8001, -0.5774,\n -0.6445, -0.7107, -0.7759, -0.8402, -0.9036, -0.9661, -1.0278, -0.8165,\n -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.0328,\n -1.0906, -1.1476, -1.2041, -1.2599, -1.3151, -1.3697, -1.4237, -1.2309,\n -1.2852, -1.3389, -1.1499, -0.9631, -1.0178, -0.8337, -0.6516, -0.4714,\n -0.5276, -0.5832, -0.4062, -0.2309, -0.2872, -0.3430, -0.1707, 0.0000,\n -0.0563, 0.1122, 0.2791, 0.4444, 0.3871, 0.5505, 0.7124, 0.8729,\n 0.8147, 0.9733, 1.1306, 1.2865, 1.2276, 1.3819, 1.5348, 1.6865,\n 1.6271, 1.7772, 1.9261, 2.0739, 2.0140, 2.1602, 2.3054, 2.4495,\n 2.3891, 2.5318, 2.6735, 2.8141, 2.7534, 2.8928, 3.0311, 3.1685,\n 3.1076, 3.2437, 3.3789, 3.5132, 3.4521, 3.5853, 3.7176, 3.8490,\n 3.7878, 3.9181, 4.0476, 4.1763, 4.1150, 4.0541, 4.1816, 4.3083,\n 4.2475, 4.3733, 4.4983, 4.6225, 4.5617, 4.6850, 4.8076, 4.9295,\n 4.8687, 4.9897, 5.1100, 5.2297, 5.1689, 5.2877, 5.4059, 5.5234,\n 5.4626, 5.5794, 5.6955, 5.8110, 5.7503, 5.8650, 5.9792, 6.0927,\n 6.0321, 6.1449, 6.2572, 6.3689, 6.3084, 6.4194, 6.5299, 6.6398,\n 6.5794, 6.6887, 6.7974, 6.9056, 6.8454, 6.9530, 7.0601, 7.1667,\n 7.1065, 7.2125, 7.3180, 7.4231, 7.3631, 7.4676, 7.5715])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "70", + "Fraction of T in Greenlist": "35.2%", + "z-score": "3.32", + "p value": "0.000458", + "z-score_at_T": "tensor([-0.5774, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, -0.6547, -0.8165,\n -0.9623, -0.3651, -0.5222, -0.6667, -0.1601, 0.3086, 0.1491, 0.0000,\n -0.1400, 0.2722, 0.1325, 0.0000, -0.1260, -0.2462, -0.3612, -0.4714,\n -0.5774, -0.2265, -0.3333, -0.4364, -0.1072, 0.2108, 0.1037, 0.0000,\n -0.1005, 0.1980, 0.0976, 0.3849, 0.2847, 0.1873, 0.4623, 0.7303,\n 0.6312, 0.5345, 0.4402, 0.6963, 0.6025, 0.5108, 0.4211, 0.3333,\n 0.2474, 0.1633, 0.0808, 0.3203, 0.2379, 0.1571, 0.3892, 0.3086,\n 0.2294, 0.4549, 0.6765, 0.5963, 0.8131, 0.7332, 0.6547, 0.8660,\n 0.7877, 0.7107, 0.9169, 1.1202, 1.0426, 1.2421, 1.4389, 1.6330,\n 1.8245, 2.0135, 2.2000, 2.3842, 2.3028, 2.4841, 2.6632, 2.8402,\n 3.0151, 3.1879, 3.1052, 3.0237, 2.9433, 3.1129, 3.2806, 3.2004,\n 3.3659, 3.5298, 3.6919, 3.6116, 3.5322, 3.4538, 3.6133, 3.7712,\n 3.9276, 3.8492, 3.7717, 3.9260, 3.8490, 4.0016, 4.1528, 4.3027,\n 4.2258, 4.1497, 4.0745, 4.0000, 4.1475, 4.0736, 4.0004, 3.9279,\n 3.8562, 3.7852, 3.7148, 3.6452, 3.7897, 3.7205, 3.6519, 3.7947,\n 3.9365, 3.8680, 3.8002, 3.7330, 3.6664, 3.8061, 3.7399, 3.6742,\n 3.6091, 3.5446, 3.4806, 3.4171, 3.3542, 3.4913, 3.4286, 3.3665,\n 3.5022, 3.6369, 3.5748, 3.5132, 3.4521, 3.3915, 3.5245, 3.4641,\n 3.5960, 3.5359, 3.4762, 3.6068, 3.7366, 3.6770, 3.6178, 3.5590,\n 3.5007, 3.6289, 3.5708, 3.5131, 3.4558, 3.3989, 3.3424, 3.2863,\n 3.2306, 3.3567, 3.3012, 3.2460, 3.3710, 3.4953, 3.4401, 3.3853,\n 3.3309, 3.2768, 3.3996, 3.3457, 3.4677, 3.4140, 3.3606, 3.4816,\n 3.6019, 3.5485, 3.4954, 3.4427, 3.3902, 3.5093, 3.4570, 3.4050,\n 3.3534, 3.3020, 3.2509, 3.2002, 3.1497, 3.2671, 3.2167, 3.1667,\n 3.2831, 3.3990, 3.3489, 3.2991, 3.2496, 3.2004, 3.3151])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: It was a summer afternoon, and the dog was sitting in the middle of the lawn. After a while, it got up and moved to a spot under the tree, because it was hot.\nWith pronoun replaced: The spot under the tree was hot.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "63", + "Fraction of T in Greenlist": "31.7%", + "z-score": "2.17", + "p value": "0.015", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, 0.7746, 0.4714, 0.2182, 0.0000,\n 0.5774, 1.0954, 0.8704, 0.6667, 1.1209, 1.5430, 1.9379, 1.7321,\n 1.5403, 1.3608, 1.1921, 1.0328, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.2702, 1.1323, 1.4444, 1.7457, 1.6082, 1.4757, 1.7628, 2.0412,\n 2.3116, 2.1783, 2.0494, 1.9245, 1.8034, 1.6859, 1.9415, 1.8257,\n 2.0738, 1.9599, 1.8489, 1.7408, 1.9795, 2.2133, 2.1054, 2.0000,\n 2.2269, 2.4495, 2.6679, 2.5621, 2.4585, 2.3570, 2.2576, 2.1602,\n 2.3706, 2.2743, 2.4804, 2.3851, 2.2916, 2.1997, 2.4004, 2.5981,\n 2.5064, 2.4163, 2.6098, 2.5205, 2.4327, 2.3462, 2.2611, 2.1773,\n 2.3651, 2.2819, 2.2000, 2.1193, 2.0397, 2.2226, 2.1436, 2.0656,\n 1.9887, 1.9127, 2.0913, 2.0158, 1.9413, 1.8677, 1.7951, 1.7233,\n 1.8972, 1.8257, 1.7552, 1.6854, 1.6164, 1.5483, 1.4809, 1.6499,\n 1.5828, 1.5164, 1.6828, 1.8475, 1.7809, 1.7150, 1.6498, 1.5852,\n 1.5213, 1.4580, 1.3954, 1.3333, 1.2719, 1.2111, 1.1508, 1.3093,\n 1.2492, 1.1896, 1.1306, 1.2865, 1.2276, 1.3819, 1.3231, 1.4757,\n 1.4171, 1.3590, 1.3014, 1.2443, 1.1877, 1.1316, 1.0759, 1.0206,\n 0.9658, 0.9115, 0.8575, 1.0050, 0.9512, 1.0973, 1.0435, 0.9901,\n 0.9372, 1.0812, 1.2243, 1.1711, 1.1183, 1.2597, 1.4001, 1.5396,\n 1.4863, 1.4335, 1.3810, 1.3288, 1.2771, 1.4142, 1.3625, 1.4985,\n 1.4470, 1.3957, 1.3448, 1.4792, 1.6127, 1.5617, 1.5110, 1.6432,\n 1.7746, 1.9052, 1.8541, 1.8033, 1.7529, 1.7028, 1.6530, 1.7817,\n 1.7321, 1.8598, 1.8102, 1.7609, 1.7119, 1.8383, 1.9640, 1.9149,\n 1.8660, 1.9906, 2.1145, 2.2377, 2.1886, 2.1398, 2.0913, 2.0430,\n 1.9950, 2.1167, 2.0688, 2.1896, 2.1418, 2.0943, 2.0470, 2.1667,\n 2.2857, 2.2384, 2.1913, 2.3094, 2.2624, 2.2156, 2.1691])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "57", + "Fraction of T in Greenlist": "28.6%", + "z-score": "1.19", + "p value": "0.118", + "z-score_at_T": "tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,\n -1.7321, -1.0954, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -0.5774,\n -0.7001, -0.8165, -0.9272, -1.0328, -0.6299, -0.7385, -0.8427, -0.9428,\n -0.5774, -0.6794, -0.3333, -0.4364, -0.1072, -0.2108, -0.3111, -0.4082,\n -0.5025, -0.1980, 0.0976, 0.0000, -0.0949, -0.1873, -0.2774, -0.3651,\n -0.4508, -0.5345, -0.6163, -0.6963, -0.7746, -0.5108, -0.2526, -0.3333,\n -0.4124, -0.4899, -0.5659, -0.3203, -0.3965, -0.1571, 0.0778, 0.0000,\n -0.0765, 0.1516, 0.0752, 0.0000, -0.0739, -0.1466, -0.2182, -0.2887,\n -0.3581, -0.4264, -0.4937, -0.2801, -0.0695, -0.1380, -0.2056, -0.2722,\n -0.3379, -0.1342, -0.2000, 0.0000, 0.1974, 0.3922, 0.3248, 0.2582,\n 0.1925, 0.1275, 0.0634, 0.0000, -0.0626, -0.1245, -0.1857, 0.0000,\n 0.1836, 0.1217, 0.0605, 0.0000, -0.0599, 0.1191, 0.0592, 0.2357,\n 0.4103, 0.5832, 0.5222, 0.4619, 0.4021, 0.3430, 0.2844, 0.2265,\n 0.1690, 0.1122, 0.0558, 0.2222, 0.3871, 0.3303, 0.2740, 0.2182,\n 0.1629, 0.3244, 0.2692, 0.4288, 0.5871, 0.7441, 0.6880, 0.6325,\n 0.5774, 0.5227, 0.4685, 0.4148, 0.3615, 0.3086, 0.2562, 0.4082,\n 0.5592, 0.5064, 0.4540, 0.4020, 0.3504, 0.4988, 0.4472, 0.5941,\n 0.7399, 0.8847, 0.8325, 0.7807, 0.7293, 0.6783, 0.6276, 0.5774,\n 0.5274, 0.4778, 0.4286, 0.5695, 0.7095, 0.6600, 0.6108, 0.5620,\n 0.5134, 0.6513, 0.6029, 0.7396, 0.8755, 1.0105, 0.9615, 0.9129,\n 0.8645, 0.8165, 0.7688, 0.7213, 0.6742, 0.6274, 0.5808, 0.7127,\n 0.8438, 0.7971, 0.7506, 0.7044, 0.6584, 0.7878, 0.7419, 0.8704,\n 0.9981, 1.1251, 1.0788, 1.0328, 0.9870, 0.9415, 0.8963, 0.8513,\n 0.8065, 0.7620, 0.7177, 0.8422, 0.9659, 0.9215, 0.8773, 0.8333,\n 0.7896, 0.9119, 0.8682, 0.9897, 1.1106, 1.2309, 1.1869])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 1 + }, + { + "prompt": "Determine if the sentence with the pronoun replaced is entailed by the original sentence. Answer 'yes' for entailment or 'no' for not entailment:\nOriginal: George got free tickets to the play, but he gave them to Eric, because he was not particularly eager to see it.\nWith pronoun replaced: Eric was not particularly eager to see it.\nEntailment:", + "true_label": 0, + "use_sampling": true, + "sampling_temp": 0.7, + "delta": 2.0, + "gamma": 0.25, + "n_beams": 1, + "detection_z_threshold": 4.0, + "without_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "36", + "Fraction of T in Greenlist": "18.1%", + "z-score": "-2.25", + "p value": "0.988", + "z-score_at_T": "tensor([ 1.7321, 0.8165, 0.3333, 0.0000, -0.2582, -0.4714, 0.2182, 0.0000,\n -0.1925, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -1.0435, -1.1547,\n -1.2603, -1.3608, -0.9272, -1.0328, -1.1339, -1.2309, -1.3242, -1.4142,\n -1.5011, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,\n -1.3065, -1.3862, -1.0735, -1.1547, -1.2339, -1.3112, -1.3868, -1.4606,\n -1.1722, -1.2472, -1.3207, -1.3926, -1.4631, -1.1918, -1.2632, -1.3333,\n -1.4021, -1.4697, -1.5361, -1.2810, -1.3482, -1.4142, -1.1677, -1.2344,\n -1.3000, -1.3646, -1.4281, -1.4907, -1.5524, -1.6131, -1.6730, -1.7321,\n -1.7903, -1.8477, -1.9044, -1.9604, -2.0156, -2.0702, -2.1241, -2.1773,\n -2.2299, -2.2819, -2.3333, -2.3842, -2.4344, -2.4841, -2.2735, -2.0656,\n -2.1170, -2.1678, -2.2180, -2.2678, -2.3170, -2.3658, -2.4140, -2.2156,\n -2.2644, -2.3126, -2.3604, -2.1669, -2.2151, -2.2629, -2.0732, -2.1213,\n -2.1690, -2.2162, -2.2630, -2.3094, -2.3554, -2.4010, -2.4462, -2.4910,\n -2.5355, -2.5796, -2.6233, -2.6667, -2.7097, -2.7524, -2.7948, -2.8368,\n -2.8786, -2.9200, -2.9611, -2.7875, -2.8289, -2.8701, -2.9109, -2.7406,\n -2.7818, -2.8226, -2.8632, -2.9035, -2.9435, -2.9832, -3.0227, -3.0619,\n -3.1008, -3.1395, -2.9762, -3.0151, -3.0538, -3.0923, -3.1305, -3.1685,\n -3.2062, -3.2437, -3.2810, -3.3181, -3.3549, -3.3915, -3.4279, -3.4641,\n -3.3083, -3.3447, -3.3810, -3.4170, -3.4528, -3.4884, -3.5238, -3.3717,\n -3.4073, -3.4428, -3.2925, -3.1433, -3.1794, -3.2152, -3.0677, -3.1038,\n -2.9576, -2.9938, -2.8490, -2.7050, -2.5620, -2.4198, -2.2785, -2.1381,\n -2.1762, -2.2140, -2.2517, -2.2892, -2.3264, -2.3635, -2.4004, -2.4371,\n -2.4736, -2.3368, -2.3734, -2.4099, -2.4461, -2.4822, -2.5181, -2.5538,\n -2.4195, -2.4553, -2.4910, -2.5265, -2.5618, -2.5969, -2.4648, -2.5000,\n -2.3688, -2.2384, -2.2740, -2.3094, -2.1801, -2.2156, -2.2510])", + "z-score Threshold": "4.0", + "Prediction": "Human/Unwatermarked" + }, + "with_watermark": { + "Tokens Counted (T)": "199", + "# Tokens in Greenlist": "118", + "Fraction of T in Greenlist": "59.3%", + "z-score": "11.2", + "p value": "2.76e-29", + "z-score_at_T": "tensor([-0.5774, -0.8165, 0.3333, 0.0000, -0.2582, 0.4714, 0.2182, 0.8165,\n 1.3472, 1.8257, 2.2630, 2.6667, 3.0424, 2.7775, 2.5342, 2.3094,\n 2.1004, 1.9052, 1.7219, 1.5492, 1.3859, 1.2309, 1.5650, 1.4142,\n 1.7321, 2.0381, 1.8889, 2.1822, 2.4659, 2.3190, 2.5924, 2.4495,\n 2.7136, 2.9704, 3.2205, 3.0792, 3.3221, 3.1844, 3.4207, 3.2863,\n 3.1558, 3.3853, 3.2577, 3.1334, 3.3566, 3.5753, 3.7897, 4.0000,\n 3.8765, 4.0825, 3.9614, 3.8431, 4.0446, 3.9284, 4.1260, 4.0119,\n 3.9001, 3.7905, 3.6831, 3.8759, 3.7700, 3.9595, 3.8552, 4.0415,\n 4.2251, 4.4061, 4.3026, 4.4809, 4.6568, 4.8305, 4.7278, 4.6268,\n 4.7977, 4.6981, 4.6000, 4.7683, 4.6715, 4.8375, 5.0017, 5.1640,\n 5.3245, 5.4832, 5.6401, 5.5435, 5.4482, 5.6032, 5.7566, 5.6622,\n 5.5691, 5.4772, 5.3865, 5.2970, 5.4480, 5.5976, 5.7458, 5.8926,\n 6.0380, 6.1820, 6.0927, 6.2354, 6.3768, 6.5169, 6.4283, 6.3408,\n 6.4795, 6.6171, 6.5303, 6.6667, 6.8019, 6.9361, 6.8500, 6.9830,\n 7.1149, 7.0296, 6.9451, 7.0759, 6.9923, 7.1220, 7.2508, 7.3786,\n 7.5056, 7.6315, 7.7566, 7.8808, 8.0042, 8.1266, 8.2483, 8.3691,\n 8.2858, 8.4057, 8.5249, 8.6433, 8.7610, 8.8778, 8.9940, 9.1094,\n 9.2240, 9.3380, 9.2554, 9.3686, 9.4812, 9.5931, 9.7044, 9.6225,\n 9.5413, 9.4608, 9.5714, 9.4916, 9.4124, 9.5224, 9.4438, 9.5532,\n 9.6619, 9.7701, 9.6921, 9.7997, 9.9067, 10.0131, 9.9357, 10.0416,\n 10.1469, 10.2516, 10.1749, 10.0987, 10.2029, 10.1273, 10.0523, 10.1559,\n 10.0814, 10.1846, 10.2872, 10.3893, 10.4909, 10.5921, 10.6927, 10.6187,\n 10.5453, 10.6455, 10.7451, 10.6722, 10.5998, 10.5278, 10.4563, 10.5556,\n 10.4846, 10.4140, 10.5128, 10.6111, 10.7090, 10.8064, 10.9034, 11.0000,\n 10.9299, 11.0261, 11.1218, 11.2171, 11.1475, 11.0782, 11.1731])", + "z-score Threshold": "4.0", + "Prediction": "Watermarked", + "Confidence": "100.000%" + }, + "predicted_label_without_watermark": 0, + "predicted_label_with_watermark": 0 + } + ], + "metrics": { + "accuracy_without_watermark": 0.4788732394366197, + "accuracy_with_watermark": 0.49295774647887325, + "f1_without_watermark": 0.47845940043676793, + "f1_with_watermark": 0.49043062200956933 + } + } + } +} \ No newline at end of file